diff --git a/packages/utils/package.json b/packages/utils/package.json index e76d1ca..3ee8575 100644 --- a/packages/utils/package.json +++ b/packages/utils/package.json @@ -127,6 +127,16 @@ "import": "./lib/emoji/regex/tree.mjs", "types": "./lib/emoji/regex/tree.d.ts" }, + "./lib/emoji/replace/find": { + "require": "./lib/emoji/replace/find.cjs", + "import": "./lib/emoji/replace/find.mjs", + "types": "./lib/emoji/replace/find.d.ts" + }, + "./lib/emoji/replace/replace": { + "require": "./lib/emoji/replace/replace.cjs", + "import": "./lib/emoji/replace/replace.mjs", + "types": "./lib/emoji/replace/replace.d.ts" + }, "./lib/emoji/cleanup": { "require": "./lib/emoji/cleanup.cjs", "import": "./lib/emoji/cleanup.mjs", diff --git a/packages/utils/src/emoji/cleanup.ts b/packages/utils/src/emoji/cleanup.ts index 1da68f8..1d774cb 100644 --- a/packages/utils/src/emoji/cleanup.ts +++ b/packages/utils/src/emoji/cleanup.ts @@ -1,8 +1,14 @@ import { getEmojiCodePoint } from './convert'; import { emojiTones, joinerEmoji, vs16Emoji } from './data'; +import { getEmojiSequenceString } from './format'; /** * Get emoji sequence from string + * + * Examples (shows same emoji sequence formatted differently): + * '1F441 FE0F 200D 1F5E8 FE0F' => [0x1f441, 0xfe0f, 0x200d, 0x1f5e8, 0xfe0f] + * '1f441-fe0f-200d-1f5e8-fe0f' => [0x1f441, 0xfe0f, 0x200d, 0x1f5e8, 0xfe0f] + * '\\uD83D\\uDC41\\uFE0F\\u200D\\uD83D\\uDDE8\\uFE0F' => [0x1f441, 0xfe0f, 0x200d, 0x1f5e8, 0xfe0f] */ export function getEmojiSequenceFromString(value: string): number[] { return value @@ -13,7 +19,12 @@ export function getEmojiSequenceFromString(value: string): number[] { } /** - * Split sequence by joiner + * Split emoji sequence by joiner + * + * Result represents one emoji, split in smaller sequences separated by 0x200D + * + * Example: + * [0x1FAF1, 0x1F3FB, 0x200D, 0x1FAF2, 0x1F3FC] => [[0x1FAF1, 0x1F3FB], [0x1FAF2, 0x1F3FC]] */ export function splitEmojiSequences(sequence: number[]): number[][] { const results: number[][] = []; @@ -33,6 +44,11 @@ export function splitEmojiSequences(sequence: number[]): number[][] { /** * Join emoji sequences + * + * Parameter represents one emoji, split in smaller sequences + * + * Example: + * [[0x1FAF1, 0x1F3FB], [0x1FAF2, 0x1F3FC]] => [0x1FAF1, 0x1F3FB, 0x200D, 0x1FAF2, 0x1F3FC] */ export function joinEmojiSequences(sequences: number[][]): number[] { let results: number[] = []; @@ -69,7 +85,19 @@ export function removeEmojiTones(sequence: number[]): number[] { }); } +/** + * Types for mapEmojiSequence() + */ type MapCallback = (sequence: number[]) => number[]; +interface MapOptions { + removeEmpty?: boolean; + removeDuplicates?: boolean; +} + +const mapOptions: Required = { + removeEmpty: true, + removeDuplicates: false, +}; /** * Run function on sequences @@ -79,10 +107,35 @@ type MapCallback = (sequence: number[]) => number[]; export function mapEmojiSequences( sequences: number[][], callback: MapCallback, - removeEmpty = true + options: MapOptions = {} ): number[][] { - const results = sequences.map((sequence) => callback(sequence)); - return removeEmpty - ? results.filter((sequence) => sequence.length > 0) - : results; + const fullOptions = { + ...mapOptions, + ...options, + }; + const values: Set = new Set(); + const results: number[][] = []; + + sequences.forEach((sequence) => { + const result = callback(sequence); + + // Check for empty sequences + if (fullOptions.removeEmpty && !result.length) { + return; + } + + // Check for duplicate + if (fullOptions.removeDuplicates) { + const value = getEmojiSequenceString(result); + if (values.has(value)) { + // duplicate + return; + } + values.add(value); + } + + results.push(result); + }); + + return results; } diff --git a/packages/utils/src/emoji/format.ts b/packages/utils/src/emoji/format.ts index f2487d0..85c4002 100644 --- a/packages/utils/src/emoji/format.ts +++ b/packages/utils/src/emoji/format.ts @@ -60,6 +60,9 @@ function convert( /** * Convert unicode number to string + * + * Example: + * 0x1F600 => '1F600' */ export function getEmojiUnicodeString( code: number, @@ -78,6 +81,9 @@ const defaultSequenceOptions: UnicodeFormattingOptions = { /** * Convert unicode numbers sequence to string + * + * Example: + * [0x1f441, 0xfe0f] => '1f441-fe0f' */ export function getEmojiSequenceString( sequence: number[], @@ -88,25 +94,3 @@ export function getEmojiSequenceString( ...options, }); } - -const keywordOptions: UnicodeFormattingOptions = { - prefix: '', - separator: '-', - case: 'lower', - format: 'utf-32', - add0: true, - throwOnError: true, -}; - -/** - * Merge unicode numbers sequence as icon keyword - */ -export function emojiSequenceToKeyword( - sequence: number[], - throwOnError = true -): string { - return convert(sequence, { - ...keywordOptions, - throwOnError, - }); -} diff --git a/packages/utils/src/emoji/parse-test.ts b/packages/utils/src/emoji/parse-test.ts index 421a095..b971b13 100644 --- a/packages/utils/src/emoji/parse-test.ts +++ b/packages/utils/src/emoji/parse-test.ts @@ -1,5 +1,7 @@ import { getEmojiSequenceFromString } from './cleanup'; import { convertEmojiSequenceToUTF32 } from './convert'; +import { getEmojiSequenceString } from './format'; +import { getUnqualifiedEmojiSequence } from './variations'; // Emoji types type EmojiType = @@ -62,3 +64,50 @@ export function parseEmojiTestFile(data: string): number[][] { convertEmojiSequenceToUTF32(getEmojiSequenceFromString(item)) ); } + +/** + * Get qualified variations from parsed test file + * + * Key is unqualified emoji, value is longest fully qualified emoji + */ +export function getQualifiedEmojiSequencesMap( + sequences: number[][] +): Map; +export function getQualifiedEmojiSequencesMap( + sequences: number[][], + toString: (value: number[]) => string +): Record; +export function getQualifiedEmojiSequencesMap( + sequences: number[][], + toString?: (value: number[]) => string +): Map | Record { + const convert = toString || getEmojiSequenceString; + const results = Object.create(null) as Record; + + for (let i = 0; i < sequences.length; i++) { + const value = convert(sequences[i]); + const unqualified = convert(getUnqualifiedEmojiSequence(sequences[i])); + // Check if values mismatch, set results to longest value + if ( + !results[unqualified] || + results[unqualified].length < value.length + ) { + results[unqualified] = value; + } + } + + // Return + if (toString) { + return results; + } + + const map: Map = new Map(); + for (const key in results) { + const value = results[key]; + map.set( + getEmojiSequenceFromString(key), + getEmojiSequenceFromString(value) + ); + } + return map; +} diff --git a/packages/utils/src/emoji/replace/find.ts b/packages/utils/src/emoji/replace/find.ts new file mode 100644 index 0000000..4d81f8b --- /dev/null +++ b/packages/utils/src/emoji/replace/find.ts @@ -0,0 +1,176 @@ +import { convertEmojiSequenceToUTF32 } from '../convert'; +import { vs16Emoji } from '../data'; +import { getEmojiSequenceString } from '../format'; + +/** + * Create regular expression instance + */ +export function createEmojiRegExp(regexp: string): RegExp { + return new RegExp(regexp, 'g'); +} + +/** + * Match + */ +export interface EmojiRegexMatch { + // Match to replace in text + match: string; + + // Sequence + sequence: number[]; + + // Icon name + keyword: string; +} + +/** + * Add prev/next + */ +interface PrevMatch { + // Match + match: EmojiRegexMatch; + + // Content between previous emoji and this emoji + prev: string; +} + +interface PrevNextMatch extends PrevMatch { + // Content betweed this emoji and next emoji + next: string; +} + +/** + * Find emojis in text + * + * Returns only one entry per match + */ +export function getEmojiMatchesInText( + regexp: string | RegExp, + content: string +): EmojiRegexMatch[] { + const results: EmojiRegexMatch[] = []; + const matches = content.match( + typeof regexp === 'string' ? createEmojiRegExp(regexp) : regexp + ); + + if (matches) { + // Sort matches by length to make sure longest matches get replaced first + matches.sort((a, b) => { + if (b.length === a.length) { + return a.localeCompare(b); + } + return b.length - a.length; + }); + + // Add all matches + let lastMatch: EmojiRegexMatch | undefined; + for (let i = 0; i < matches.length; i++) { + const match = matches[i]; + + if (lastMatch && lastMatch.match === match) { + continue; + } + + // Get sequence + const sequence: number[] = []; + for (const codePoint of match) { + const num = codePoint.codePointAt(0) as number; + if (num !== vs16Emoji) { + sequence.push(num); + } + } + + // Get keyword + const keyword = getEmojiSequenceString( + convertEmojiSequenceToUTF32(sequence) + ); + + lastMatch = { + match, + sequence, + keyword, + }; + results.push(lastMatch); + } + } + + return results; +} + +/** + * Sort emojis, get prev and next text + */ +export function sortEmojiMatchesInText( + content: string, + matches: EmojiRegexMatch[] +): PrevNextMatch[] { + // Find all ranges + interface Range { + match: EmojiRegexMatch; + start: number; + end: number; + } + const ranges: Range[] = []; + + const check = (start: number, end: number): boolean => { + for (let i = 0; i < ranges.length; i++) { + if (start < ranges[i].end && end >= ranges[i].start) { + return false; + } + } + return true; + }; + + for (let i = 0; i < matches.length; i++) { + const match = matches[i]; + const search = match.match; + + let startFrom = 0; + let start: number; + while ((start = content.indexOf(search, startFrom)) !== -1) { + const end = start + search.length; + startFrom = end; + + // Make sure it doesn't interfere with other replacements + if (check(start, end)) { + ranges.push({ + start, + end, + match, + }); + } + } + } + + // Sort ranges + ranges.sort((a, b) => a.start - b.start); + const list: PrevMatch[] = []; + let prevRange: Range | undefined; + let lastEnd: number | undefined; + for (let i = 0; i < ranges.length; i++) { + const range = ranges[i]; + const prev = content.slice(prevRange ? prevRange.end : 0, range.start); + list.push({ + match: range.match, + prev, + }); + prevRange = range; + lastEnd = range.end; + } + + // Convert to full data + if (!lastEnd) { + // Empty list + return []; + } + + const replacements: PrevNextMatch[] = list.map((item, index) => { + const nextItem = list[index + 1]; + return { + ...item, + next: nextItem ? nextItem.prev : content.slice(lastEnd), + }; + }); + + return replacements; +} diff --git a/packages/utils/src/emoji/replace/replace.ts b/packages/utils/src/emoji/replace/replace.ts new file mode 100644 index 0000000..e1f426b --- /dev/null +++ b/packages/utils/src/emoji/replace/replace.ts @@ -0,0 +1,60 @@ +import { + EmojiRegexMatch, + getEmojiMatchesInText, + sortEmojiMatchesInText, +} from './find'; + +/** + * Callback for replacing emoji in text + * + * Returns text to replace emoji with, undefined to skip replacement + */ +export type FindAndReplaceEmojisInTextCallback = ( + // Match + match: EmojiRegexMatch, + // Text before replacement + prev: string +) => string | undefined; + +/** + * Find and replace emojis in text + * + * Returns null if nothing was replaced + */ +export function findAndReplaceEmojisInText( + regexp: string | RegExp, + content: string, + callback: FindAndReplaceEmojisInTextCallback +): string | null { + const matches = getEmojiMatchesInText(regexp, content); + if (!matches.length) { + return null; + } + + const sortedMatches = sortEmojiMatchesInText(content, matches); + + // Replace all matches + let result = ''; + let replaced = false; + for (let i = 0; i < sortedMatches.length; i++) { + const item = sortedMatches[i]; + result += item.prev; + const replacement = callback( + { + ...item.match, + }, + result + ); + if (replacement === void 0) { + // Nothing to replace + result += item.match.match; + } else { + // Replace content + result += replacement; + replaced = true; + } + } + result += sortedMatches[sortedMatches.length - 1].next; + + return replaced ? result : null; +} diff --git a/packages/utils/src/emoji/variations.ts b/packages/utils/src/emoji/variations.ts index 8456bca..172801a 100644 --- a/packages/utils/src/emoji/variations.ts +++ b/packages/utils/src/emoji/variations.ts @@ -7,6 +7,41 @@ import { import { convertEmojiSequenceToUTF32 } from './convert'; import { keycapEmoji, vs16Emoji } from './data'; import { getEmojiSequenceString } from './format'; +import { getQualifiedEmojiSequencesMap } from './parse-test'; + +/** + * Get unqualified sequence + */ +export function getUnqualifiedEmojiSequence(sequence: number[]): number[] { + return sequence.filter((num) => num !== vs16Emoji); +} + +/** + * Get qualified sequence, adding optional `FE0F` wherever it might exist + * + * This might result in sequence that is not actually valid, but considering + * that `FE0F` is always treated as optional, full sequence used in regex will + * catch both qualified and unqualified emojis, so proper sequence will get + * caught anyway. This function just makes sure that in case if sequence does + * have `FE0F`, it will be caught by regex too. + */ +export function guessQualifiedEmojiSequence(sequence: number[]): number[] { + const split = splitEmojiSequences(sequence).map((part) => { + // Check for `FE0F` + if (part.indexOf(vs16Emoji) !== -1) { + return part; + } + + // Check for keycap + if (part.length === 2 && part[1] === keycapEmoji) { + return [part[0], vs16Emoji, part[1]]; + } + + // Add `FE0F` to 1 character emojis + return part.length === 1 ? [part[0], vs16Emoji] : part; + }); + return joinEmojiSequences(split); +} /** * Add optional variations to emojis @@ -19,55 +54,42 @@ import { getEmojiSequenceString } from './format'; export function addOptionalEmojiVariations( sequences: number[][], testData?: number[][] -): number[][] { +): number[][]; +export function addOptionalEmojiVariations( + sequences: number[][], + testData: number[][], + toString: (value: number[]) => string +): string[]; +export function addOptionalEmojiVariations( + sequences: number[][], + testData: number[][] = [], + toString?: (value: number[]) => string +): number[][] | string[] { + const convert = toString || getEmojiSequenceString; + // Map test data - const testDataMap = Object.create(null) as Record; - testData?.forEach((sequence) => { - const convertedSequence = convertEmojiSequenceToUTF32(sequence); - - // Clean up sequence - const key = getEmojiSequenceString( - removeEmojiVariations(convertedSequence) - ); - if (testDataMap[key]?.length > convertedSequence.length) { - // Already got version with more variations - return; - } - - testDataMap[key] = getEmojiSequenceString(convertedSequence); - }); + const testDataMap = getQualifiedEmojiSequencesMap(testData, convert); // Parse all sequences const set: Set = new Set(); sequences.forEach((sequence) => { + // Convert to UTF-32, remove variations const convertedSequence = convertEmojiSequenceToUTF32(sequence); const cleanSequence = removeEmojiVariations(convertedSequence); - const mapKey = getEmojiSequenceString(cleanSequence); + + // Check test data + const mapKey = convert(cleanSequence); if (testDataMap[mapKey]) { // Got item from test data set.add(testDataMap[mapKey]); return; } - // Emoji is missing in test data: add `FE0F` as needed - const parts = splitEmojiSequences(convertedSequence).map((part) => { - // Check for `FE0F` - if (part.indexOf(vs16Emoji) !== -1) { - return part; - } - - // Check for keycap - if (part.length === 2 && part[1] === keycapEmoji) { - return [part[0], vs16Emoji, part[1]]; - } - - // Add `FE0F` to 1 character emojis - return part.length === 1 ? [part[0], vs16Emoji] : part; - }); - - set.add(getEmojiSequenceString(joinEmojiSequences(parts))); + // Not in test data: guess variations + set.add(convert(guessQualifiedEmojiSequence(cleanSequence))); }); - return Array.from(set).map(getEmojiSequenceFromString); + const results = Array.from(set); + return toString ? results : results.map(getEmojiSequenceFromString); } diff --git a/packages/utils/src/index.ts b/packages/utils/src/index.ts index 3475ed0..3739d48 100644 --- a/packages/utils/src/index.ts +++ b/packages/utils/src/index.ts @@ -86,8 +86,6 @@ export { loadIcon } from './loader/loader'; // Emojis export { getEmojiSequenceFromString, - splitEmojiSequences, - joinEmojiSequences, removeEmojiVariations, removeEmojiTones, mapEmojiSequences, @@ -101,17 +99,20 @@ export { convertEmojiSequenceToUTF16, convertEmojiSequenceToUTF32, } from './emoji/convert'; +export { getEmojiUnicodeString, getEmojiSequenceString } from './emoji/format'; export { - getEmojiUnicodeString, - getEmojiSequenceString, - emojiSequenceToKeyword, -} from './emoji/format'; -export { parseEmojiTestFile } from './emoji/parse-test'; -export { addOptionalEmojiVariations } from './emoji/variations'; + parseEmojiTestFile, + getQualifiedEmojiSequencesMap, +} from './emoji/parse-test'; +export { + getUnqualifiedEmojiSequence, + addOptionalEmojiVariations, +} from './emoji/variations'; export { createOptimisedRegex, createOptimisedRegexForEmojiSequences, } from './emoji/regex/create'; +export { findAndReplaceEmojisInText } from './emoji/replace/replace'; // Misc export { camelize, camelToKebab, snakelize, pascalize } from './misc/strings'; diff --git a/packages/utils/tests/emoji-cleanup-test.ts b/packages/utils/tests/emoji-cleanup-test.ts index d1f6f18..02c8265 100644 --- a/packages/utils/tests/emoji-cleanup-test.ts +++ b/packages/utils/tests/emoji-cleanup-test.ts @@ -1,5 +1,4 @@ -/* eslint-disable @typescript-eslint/no-non-null-assertion */ -import { convertEmojiSequenceToUTF32 } from '../lib'; +import { convertEmojiSequenceToUTF32 } from '../lib/emoji/convert'; import { getEmojiSequenceFromString, joinEmojiSequences, diff --git a/packages/utils/tests/emoji-format-test.ts b/packages/utils/tests/emoji-format-test.ts index 0cab9e4..ad3b582 100644 --- a/packages/utils/tests/emoji-format-test.ts +++ b/packages/utils/tests/emoji-format-test.ts @@ -1,21 +1,20 @@ -/* eslint-disable @typescript-eslint/no-non-null-assertion */ -import { - getEmojiSequenceString, - emojiSequenceToKeyword, -} from '../lib/emoji/format'; +import { getEmojiSequenceString } from '../lib/emoji/format'; describe('Testing formatting emoji code points', () => { it('Empty sequence', () => { const sequence: number[] = []; expect(getEmojiSequenceString(sequence)).toBe(''); - expect(emojiSequenceToKeyword(sequence)).toBe(''); }); it('Keycap sequence', () => { const sequence: number[] = [0x23, 0xfe0f, 0x20e3]; expect(getEmojiSequenceString(sequence)).toBe('23-fe0f-20e3'); - expect(emojiSequenceToKeyword(sequence)).toBe('0023-fe0f-20e3'); + expect( + getEmojiSequenceString(sequence, { + add0: true, + }) + ).toBe('0023-fe0f-20e3'); }); it('UTF-16 sequence', () => { @@ -27,9 +26,6 @@ describe('Testing formatting emoji code points', () => { expect(getEmojiSequenceString(sequence)).toBe( '1f441-fe0f-200d-1f5e8-fe0f' ); - expect(emojiSequenceToKeyword(sequence)).toBe( - '1f441-fe0f-200d-1f5e8-fe0f' - ); }); it('UTF-32 sequence', () => { @@ -39,8 +35,5 @@ describe('Testing formatting emoji code points', () => { expect(getEmojiSequenceString(sequence)).toBe( '1f441-fe0f-200d-1f5e8-fe0f' ); - expect(emojiSequenceToKeyword(sequence)).toBe( - '1f441-fe0f-200d-1f5e8-fe0f' - ); }); }); diff --git a/packages/utils/tests/emoji-optional-variations-test.ts b/packages/utils/tests/emoji-optional-variations-test.ts index 4296fa6..85a871a 100644 --- a/packages/utils/tests/emoji-optional-variations-test.ts +++ b/packages/utils/tests/emoji-optional-variations-test.ts @@ -1,8 +1,10 @@ -/* eslint-disable @typescript-eslint/no-non-null-assertion */ import { readFile, writeFile, unlink } from 'node:fs/promises'; import { getEmojiSequenceFromString } from '../lib/emoji/cleanup'; import { getEmojiSequenceString } from '../lib/emoji/format'; -import { parseEmojiTestFile } from '../lib/emoji/parse-test'; +import { + getQualifiedEmojiSequencesMap, + parseEmojiTestFile, +} from '../lib/emoji/parse-test'; import { addOptionalEmojiVariations } from '../lib/emoji/variations'; describe('Optional variations of emoji sequences', () => { @@ -88,6 +90,23 @@ describe('Optional variations of emoji sequences', () => { } const testData = parseEmojiTestFile(data); + // Make sure testData contains both fully-qualified and unqualified emojis + const testDataStrings = new Set( + testData.map((sequence) => getEmojiSequenceString(sequence)) + ); + expect(testDataStrings.has('1f600')).toBe(true); + expect(testDataStrings.has('263a')).toBe(true); + expect(testDataStrings.has('263a-fe0f')).toBe(true); + + // Test getQualifiedEmojiSequencesMap + const unqualifiedTest = getQualifiedEmojiSequencesMap( + testData, + getEmojiSequenceString + ); + expect(unqualifiedTest['1f600']).toBe('1f600'); + expect(unqualifiedTest['263a']).toBe('263a-fe0f'); + + // Sequences to test const sequences = [ // emoji without variation in test file '1F601', diff --git a/packages/utils/tests/emoji-regex-find-test.ts b/packages/utils/tests/emoji-regex-find-test.ts new file mode 100644 index 0000000..fb0a78b --- /dev/null +++ b/packages/utils/tests/emoji-regex-find-test.ts @@ -0,0 +1,107 @@ +import { createOptimisedRegex } from '../lib/emoji/regex/create'; +import { + getEmojiMatchesInText, + sortEmojiMatchesInText, +} from '../lib/emoji/replace/find'; + +describe('Finding emojis in text', () => { + it('Simple regex', () => { + const regexValue = createOptimisedRegex([ + '1F600', + '1F603', + '1F604', + '263A FE0F', + ]); + + const text1 = 'E1.0 grinning face: '; + const emoji1 = String.fromCodePoint(0x1f600); + const text2 = '\nE0.6 grinning face with big eyes: '; + const emoji2 = String.fromCodePoint(0x1f603); + const text3 = 'E1.0 grinning face: '; + const emoji3 = emoji1; + const text4 = 'E0.6 smiling face: '; + const emoji4 = '\u263A\uFE0F'; + const text5 = '(fully-qualified)\nE0.6 smiling face: '; + const emoji5 = '\u263A'; + const text6 = '(unqualified)'; + + const content = + text1 + + emoji1 + + text2 + + emoji2 + + text3 + + emoji3 + + text4 + + emoji4 + + text5 + + emoji5 + + text6; + const matches = getEmojiMatchesInText(regexValue, content); + + expect(matches).toEqual([ + { + match: '\u263A\uFE0F', + sequence: [0x263a], + keyword: '263a', + }, + { + // Should be returned only once + match: String.fromCodePoint(0x1f600), + sequence: [0x1f600], + keyword: '1f600', + }, + { + match: String.fromCodePoint(0x1f603), + sequence: [0x1f603], + keyword: '1f603', + }, + { + // Same as first, but without 'FE0F' + match: '\u263A', + sequence: [0x263a], + keyword: '263a', + }, + ]); + + const sortedMatches = sortEmojiMatchesInText(content, matches); + expect(sortedMatches).toEqual([ + // Same order as in content + { + match: emoji1, + sequence: [0x1f600], + keyword: '1f600', + prev: text1, + next: text2, + }, + { + match: emoji2, + sequence: [0x1f603], + keyword: '1f603', + prev: text2, + next: text3, + }, + { + match: emoji3, + sequence: [0x1f600], + keyword: '1f600', + prev: text3, + next: text4, + }, + { + match: emoji4, + sequence: [0x263a], + keyword: '263a', + prev: text4, + next: text5, + }, + { + match: emoji5, + sequence: [0x263a], + keyword: '263a', + prev: text5, + next: text6, + }, + ]); + }); +}); diff --git a/packages/utils/tests/emoji-regex-replace-test.ts b/packages/utils/tests/emoji-regex-replace-test.ts new file mode 100644 index 0000000..6be96ee --- /dev/null +++ b/packages/utils/tests/emoji-regex-replace-test.ts @@ -0,0 +1,82 @@ +import { createOptimisedRegex } from '../lib/emoji/regex/create'; +import { findAndReplaceEmojisInText } from '../lib/emoji/replace/replace'; + +describe('Replacing emojis in text', () => { + it('Simple and complex regex matches', () => { + const grinningCatEmoji = String.fromCodePoint(0x1f63a); + const alienEmoji = String.fromCodePoint(0x1f47d); + const testEmoji = + String.fromCodePoint(0x1f441) + + String.fromCodePoint(0xfe0f) + + String.fromCodePoint(0x200d) + + String.fromCodePoint(0x1f5e8); + + const sequence = [ + '1f63a', + '1f47d', + // 2 emojis that can be sequences of each other + '1F441 FE0F', + '1F441 FE0F 200D 1F5E8 FE0F', + '1F5E8 FE0F', + ]; + const regex = createOptimisedRegex(sequence); + + const text = + 'Grinning Cat: ' + + grinningCatEmoji + + ', aliens: ' + + alienEmoji + + alienEmoji + + alienEmoji + + ', Test: ' + + testEmoji + + 'end!'; + + // Counters + let grinningCatCalled = 0; + let alienCalled = 0; + let testCalled = 0; + const replaced = findAndReplaceEmojisInText( + regex, + text, + (match, prev) => { + switch (match.match) { + case grinningCatEmoji: { + expect(prev).toBe('Grinning Cat: '); + grinningCatCalled++; + return ':cat:'; + } + + case alienEmoji: { + if (alienCalled) { + expect(prev).toBe( + 'Grinning Cat: :cat:, aliens: ' + + ':alien:'.repeat(alienCalled) + ); + } + alienCalled++; + return ':alien:'; + } + + case testEmoji: { + testCalled++; + return ':test:'; + } + + default: { + throw new Error( + `Unexpected match: ${JSON.stringify(match)}` + ); + } + } + } + ); + + expect(grinningCatCalled).toBe(1); + expect(alienCalled).toBe(3); + expect(testCalled).toBe(1); + expect(replaced).toBe( + 'Grinning Cat: :cat:, aliens: :alien::alien::alien:, Test: :test:end!' + ); + }); +});