import { readFile, writeFile, unlink } from 'node:fs/promises'; import { parseEmojiTestFile } from '../lib/emoji/test/parse'; import { emojiVersion } from '../lib/emoji/data'; import { createOptimisedRegex, createOptimisedRegexForEmojiSequences, } from '../lib/emoji/regex/create'; import { getEmojiMatchesInText, sortEmojiMatchesInText, } from '../lib/emoji/replace/find'; import { getQualifiedEmojiVariations } from '../lib/emoji/test/variations'; import { getEmojiSequenceString } from '../lib/emoji/format'; describe('Finding emojis in text', () => { async function fetchEmojiTestData(): Promise { // Fetch emojis, cache it const source = `tests/fixtures/download-emoji-${emojiVersion}.txt`; let data: string | undefined; try { data = await readFile(source, 'utf8'); } catch { // } if (!data) { data = ( await ( await fetch( `https://unicode.org/Public/emoji/${emojiVersion}/emoji-test.txt` ) ).text() ).toString(); await writeFile(source, data, 'utf8'); } // Test content, unlink cache on failure if (data.indexOf(`# Version: ${emojiVersion}`) === -1) { try { await unlink(source); } catch { // } return; } return data; } let data: string | undefined; beforeAll(async () => { data = await fetchEmojiTestData(); }); it('Simple regex', () => { const regexValue = createOptimisedRegex([ '1F600', '1F603', '1F604', '263A FE0F', ]); const text1 = 'E1.0 grinning face: '; const emoji1 = String.fromCodePoint(0x1f600); const text2 = '\nE0.6 grinning face with big eyes: '; const emoji2 = String.fromCodePoint(0x1f603); const text3 = 'E1.0 grinning face: '; const emoji3 = emoji1; const text4 = 'E0.6 smiling face: '; const emoji4 = '\u263A\uFE0F'; const text5 = '(fully-qualified)\nE0.6 smiling face: '; const emoji5 = '\u263A'; const text6 = '(unqualified)'; const content = text1 + emoji1 + text2 + emoji2 + text3 + emoji3 + text4 + emoji4 + text5 + emoji5 + text6; const matches = getEmojiMatchesInText(regexValue, content); expect(matches).toEqual([ { match: '\u263A\uFE0F', sequence: [0x263a], keyword: '263a', regexp: 0, }, { // Should be returned only once match: String.fromCodePoint(0x1f600), sequence: [0x1f600], keyword: '1f600', regexp: 0, }, { match: String.fromCodePoint(0x1f603), sequence: [0x1f603], keyword: '1f603', regexp: 0, }, { // Same as first, but without 'FE0F' match: '\u263A', sequence: [0x263a], keyword: '263a', regexp: 0, }, ]); const sortedMatches = sortEmojiMatchesInText(content, matches); expect(sortedMatches).toEqual([ // Same order as in content { match: { match: emoji1, sequence: [0x1f600], keyword: '1f600', regexp: 0, }, prev: text1, next: text2, }, { match: { match: emoji2, sequence: [0x1f603], keyword: '1f603', regexp: 0, }, prev: text2, next: text3, }, { match: { match: emoji3, sequence: [0x1f600], keyword: '1f600', regexp: 0, }, prev: text3, next: text4, }, { match: { match: emoji4, sequence: [0x263a], keyword: '263a', regexp: 0, }, prev: text4, next: text5, }, { match: { match: emoji5, sequence: [0x263a], keyword: '263a', regexp: 0, }, prev: text5, next: text6, }, ]); }); it('Multiple regex', () => { const regex0 = createOptimisedRegex(['1F600', '1F603', '1F604']); const regex1 = createOptimisedRegex(['263A FE0F']); const text1 = 'E1.0 grinning face: '; const emoji1 = String.fromCodePoint(0x1f600); const text2 = '\nE0.6 grinning face with big eyes: '; const emoji2 = String.fromCodePoint(0x1f603); const text3 = 'E1.0 grinning face: '; const emoji3 = emoji1; const text4 = 'E0.6 smiling face: '; const emoji4 = '\u263A\uFE0F'; const text5 = '(fully-qualified)\nE0.6 smiling face: '; const emoji5 = '\u263A'; const text6 = '(unqualified)'; const content = text1 + emoji1 + text2 + emoji2 + text3 + emoji3 + text4 + emoji4 + text5 + emoji5 + text6; const matches = getEmojiMatchesInText([regex0, regex1], content); expect(matches).toEqual([ { match: '\u263A\uFE0F', sequence: [0x263a], keyword: '263a', regexp: 1, }, { // Should be returned only once match: String.fromCodePoint(0x1f600), sequence: [0x1f600], keyword: '1f600', regexp: 0, }, { match: String.fromCodePoint(0x1f603), sequence: [0x1f603], keyword: '1f603', regexp: 0, }, { // Same as first, but without 'FE0F' match: '\u263A', sequence: [0x263a], keyword: '263a', regexp: 1, }, ]); const sortedMatches = sortEmojiMatchesInText(content, matches); expect(sortedMatches).toEqual([ // Same order as in content { match: { match: emoji1, sequence: [0x1f600], keyword: '1f600', regexp: 0, }, prev: text1, next: text2, }, { match: { match: emoji2, sequence: [0x1f603], keyword: '1f603', regexp: 0, }, prev: text2, next: text3, }, { match: { match: emoji3, sequence: [0x1f600], keyword: '1f600', regexp: 0, }, prev: text3, next: text4, }, { match: { match: emoji4, sequence: [0x263a], keyword: '263a', regexp: 1, }, prev: text4, next: text5, }, { match: { match: emoji5, sequence: [0x263a], keyword: '263a', regexp: 1, }, prev: text5, next: text6, }, ]); }); it('Sequences without spaces', () => { const regex = createOptimisedRegex(['1F63A', '1F638', '1F639']); const emoji1 = String.fromCodePoint(0x1f63a); const emoji2 = String.fromCodePoint(0x1f638); const emoji3 = String.fromCodePoint(0x1f639); const content = emoji1 + emoji2 + emoji3 + emoji1 + emoji2; const matches = getEmojiMatchesInText(regex, content); expect(matches).toEqual([ { match: '\uD83D\uDE38', sequence: [0x1f638], keyword: '1f638', regexp: 0, }, { match: '\uD83D\uDE39', sequence: [0x1f639], keyword: '1f639', regexp: 0, }, { match: '\uD83D\uDE3A', sequence: [0x1f63a], keyword: '1f63a', regexp: 0, }, ]); const sortedMatches = sortEmojiMatchesInText(content, matches); expect(sortedMatches).toEqual([ // Same order as in content { match: { match: '\uD83D\uDE3A', sequence: [0x1f63a], keyword: '1f63a', regexp: 0, }, prev: '', next: '', }, { match: { match: '\uD83D\uDE38', sequence: [0x1f638], keyword: '1f638', regexp: 0, }, prev: '', next: '', }, { match: { match: '\uD83D\uDE39', sequence: [0x1f639], keyword: '1f639', regexp: 0, }, prev: '', next: '', }, { match: { match: '\uD83D\uDE3A', sequence: [0x1f63a], keyword: '1f63a', regexp: 0, }, prev: '', next: '', }, { match: { match: '\uD83D\uDE38', sequence: [0x1f638], keyword: '1f638', regexp: 0, }, prev: '', next: '', }, ]); }); it('Finding all test emojis', () => { if (!data) { console.warn('Test skipped: test data is not available'); return; } // Parse test data const testData = parseEmojiTestFile(data); const sequences = Object.values(testData).map(({ sequence }) => { return { sequence, }; }); // Get all icons const iconsList = getQualifiedEmojiVariations(sequences); // Get regex const regexValue = createOptimisedRegexForEmojiSequences( iconsList.map((item) => item.sequence) ); const regex = new RegExp(regexValue, 'g'); sequences.forEach((sequence) => { const text = sequence.sequence .map((code) => String.fromCodePoint(code)) .join(''); // Test finding match const result = getEmojiMatchesInText(regex, text); // Must have only 1 item if (result.length !== 1) { console.log( getEmojiSequenceString(sequence.sequence), `(\\u${getEmojiSequenceString(sequence.sequence, { format: 'utf-16', separator: '\\u', case: 'upper', })})`, text ); result.forEach((match) => { const sequence: number[] = []; for (const codePoint of match.match) { const num = codePoint.codePointAt(0) as number; sequence.push(num); } console.log( getEmojiSequenceString(sequence), `(\\u${getEmojiSequenceString(sequence, { format: 'utf-16', separator: '\\u', case: 'upper', })})` ); }); console.log(result); expect(result.length).toBe(1); } const firstMatch = result[0]; const resultSequence = []; for (const codePoint of firstMatch.match) { const num = codePoint.codePointAt(0) as number; resultSequence.push(num); } if (resultSequence.length !== sequence.sequence.length) { console.log( getEmojiSequenceString(sequence.sequence), `(\\u${getEmojiSequenceString(sequence.sequence, { format: 'utf-16', separator: '\\u', case: 'upper', })})`, result ); } expect(resultSequence).toEqual(sequence.sequence); }); }); });