diff --git a/packages/utils/src/emoji/cleanup.ts b/packages/utils/src/emoji/cleanup.ts index 4f16aa2..1da68f8 100644 --- a/packages/utils/src/emoji/cleanup.ts +++ b/packages/utils/src/emoji/cleanup.ts @@ -5,7 +5,11 @@ import { emojiTones, joinerEmoji, vs16Emoji } from './data'; * Get emoji sequence from string */ export function getEmojiSequenceFromString(value: string): number[] { - return value.trim().split(/[\s-]/).map(getEmojiCodePoint); + return value + .trim() + .split(/[^0-9A-F]+/i) + .filter((item) => item.length > 0) + .map(getEmojiCodePoint); } /** diff --git a/packages/utils/src/emoji/regex/create.ts b/packages/utils/src/emoji/regex/create.ts index a09d103..203b337 100644 --- a/packages/utils/src/emoji/regex/create.ts +++ b/packages/utils/src/emoji/regex/create.ts @@ -24,6 +24,17 @@ export function createOptimisedRegexForEmojiSequences( /** * Create optimised regex for emojis + * + * First parameter is array of emojis, entry can be either list of + * code points or emoji sequence as a string + * + * Examples of acceptable strings (case insensitive): + * '1F636 200D 1F32B FE0F' - space separated UTF32 sequence + * '1f636-200d-1f32b-fe0f' - dash separated UTF32 sequence + * 'd83d-de36-200d-d83c-df2b-fe0f' - dash separated UTF16 sequence + * '\\uD83D\\uDE36\\u200D\\uD83C\\uDF2B\\uFE0F' - UTF16 sequence escaped with '\\u' + * + * All examples above refer to the same emoji and will generate the same regex result */ export function createOptimisedRegex( emojis: (string | number[])[], diff --git a/packages/utils/src/emoji/regex/parse.ts b/packages/utils/src/emoji/regex/parse.ts new file mode 100644 index 0000000..e69de29 diff --git a/packages/utils/tests/emoji-cleanup-test.ts b/packages/utils/tests/emoji-cleanup-test.ts index fad4c5c..d1f6f18 100644 --- a/packages/utils/tests/emoji-cleanup-test.ts +++ b/packages/utils/tests/emoji-cleanup-test.ts @@ -1,4 +1,5 @@ /* eslint-disable @typescript-eslint/no-non-null-assertion */ +import { convertEmojiSequenceToUTF32 } from '../lib'; import { getEmojiSequenceFromString, joinEmojiSequences, @@ -16,6 +17,18 @@ describe('Testing formatting emoji cleanup', () => { ); expect(sequence).toEqual([0x1f441, 0xfe0f, 0x200d, 0x1f5e8, 0xfe0f]); + // Various representations of the same sequence + expect( + getEmojiSequenceFromString('1f441-fe0f-200d-1f5e8-fe0f') + ).toEqual(sequence); + expect( + convertEmojiSequenceToUTF32( + getEmojiSequenceFromString( + '\\uD83D\\uDC41\\uFE0F\\u200D\\uD83D\\uDDE8\\uFE0F' + ) + ) + ).toEqual(sequence); + // Split const split = splitEmojiSequences(sequence); expect(split).toEqual([