diff --git a/packages/utils/src/emoji/cleanup.ts b/packages/utils/src/emoji/cleanup.ts index 8d9b256..ce20aad 100644 --- a/packages/utils/src/emoji/cleanup.ts +++ b/packages/utils/src/emoji/cleanup.ts @@ -1,6 +1,5 @@ import { getEmojiCodePoint } from './convert'; -import { emojiComponents, joinerEmoji, vs16Emoji } from './data'; -import { getEmojiSequenceKeyword } from './format'; +import { joinerEmoji, vs16Emoji } from './data'; /** * Get emoji sequence from string @@ -18,6 +17,32 @@ export function getEmojiSequenceFromString(value: string): number[] { .map(getEmojiCodePoint); } +/** + * Convert emoji sequence or keyword + * + * If sequence is characters list, like '1f441-fe0f', it will be converted to [0x1f441, 0xfe0f] + * If sequence contains anything other than [0-9A-F-\s], it will be converted character by character + * + * This is used to treat keywords, like ':cat:' differently when converting strings to sequences + */ +export function getSequenceFromEmojiStringOrKeyword(value: string): number[] { + if (!value.match(/^[0-9a-fA-F-\s]+$/)) { + // Treat as string + const results: number[] = []; + for (const codePoint of value) { + const code = codePoint.codePointAt(0); + if (code) { + results.push(code); + } else { + // Something went wrong + return getEmojiSequenceFromString(value); + } + } + return results; + } + return getEmojiSequenceFromString(value); +} + /** * Split emoji sequence by joiner * diff --git a/packages/utils/src/emoji/regex/base.ts b/packages/utils/src/emoji/regex/base.ts index 46f3384..3b044d0 100644 --- a/packages/utils/src/emoji/regex/base.ts +++ b/packages/utils/src/emoji/regex/base.ts @@ -1,5 +1,3 @@ -import { getEmojiUnicodeString, UnicodeFormattingOptions } from '../format'; - /** * Regex in item */ @@ -90,22 +88,34 @@ export type EmojiItemRegex = | SetEmojiItemRegex | OptionalEmojiItemRegex; -/** - * Options for converting number to string - */ -const numberToStringOptions: Partial = { - prefix: '\\u', - separator: '', - case: 'upper', - format: 'utf-16', - add0: true, -}; - /** * Convert number to string */ function toString(number: number): string { - return getEmojiUnicodeString(number, numberToStringOptions); + if (number < 255) { + // Hex or character + if (number > 32 && number < 127) { + // Character + const char = String.fromCharCode(number); + if ( + // 0-9 + (number > 47 && number < 58) || + // A-Z + (number > 64 && number < 91) || + // _`a-z + (number > 94 && number < 123) + ) { + return char; + } + return '\\' + char; + } + return ( + '\\x' + (number < 16 ? '0' : '') + number.toString(16).toUpperCase() + ); + } + + // Unicode + return '\\u' + number.toString(16).toUpperCase(); } /** diff --git a/packages/utils/src/emoji/regex/create.ts b/packages/utils/src/emoji/regex/create.ts index 26bbed9..54dd005 100644 --- a/packages/utils/src/emoji/regex/create.ts +++ b/packages/utils/src/emoji/regex/create.ts @@ -1,4 +1,4 @@ -import { getEmojiSequenceFromString } from '../cleanup'; +import { getSequenceFromEmojiStringOrKeyword } from '../cleanup'; import { convertEmojiSequenceToUTF32 } from '../convert'; import { getQualifiedEmojiVariations } from '../test/variations'; import { createEmojisTree, parseEmojiTree } from './tree'; @@ -42,7 +42,9 @@ export function createOptimisedRegex( ): string { // Convert to numbers let sequences = emojis.map((item) => - typeof item === 'string' ? getEmojiSequenceFromString(item) : item + typeof item === 'string' + ? getSequenceFromEmojiStringOrKeyword(item) + : item ); // Add variations diff --git a/packages/utils/tests/emoji-regex-test.ts b/packages/utils/tests/emoji-regex-test.ts index 2f1301f..160a99a 100644 --- a/packages/utils/tests/emoji-regex-test.ts +++ b/packages/utils/tests/emoji-regex-test.ts @@ -16,6 +16,23 @@ E1.0 grinning face: ${String.fromCodePoint(0x1f600)} expect(matches?.[1]).toBe(String.fromCodePoint(0x1f603)); }); + it('Keywords', () => { + const regexValue = createOptimisedRegex([ + ':cat:', + ':gray_cat:', + ':tabby_cat:', + ]); + + const matches = ` +Cat: :cat: +Tabby cat: :tabby_cat: +`.match(new RegExp(regexValue, 'g')); + + expect(matches?.length).toBe(2); + expect(matches?.[0]).toBe(':cat:'); + expect(matches?.[1]).toBe(':tabby_cat:'); + }); + it('Sequences', () => { const regexValue = createOptimisedRegex([ // Emoji with optional variation