2
0
mirror of https://github.com/iconify/iconify.git synced 2024-12-22 09:48:54 +00:00
iconify/packages/utils/tests/emoji-regex-find-test.ts
2024-09-14 07:53:24 +03:00

460 lines
9.2 KiB
TypeScript

import { readFile, writeFile, unlink } from 'node:fs/promises';
import { parseEmojiTestFile } from '../lib/emoji/test/parse';
import { emojiVersion } from '../lib/emoji/data';
import {
createOptimisedRegex,
createOptimisedRegexForEmojiSequences,
} from '../lib/emoji/regex/create';
import {
getEmojiMatchesInText,
sortEmojiMatchesInText,
} from '../lib/emoji/replace/find';
import { getQualifiedEmojiVariations } from '../lib/emoji/test/variations';
import { getEmojiSequenceString } from '../lib/emoji/format';
describe('Finding emojis in text', () => {
async function fetchEmojiTestData(): Promise<string | undefined> {
// Fetch emojis, cache it
const source = `tests/fixtures/download-emoji-${emojiVersion}.txt`;
let data: string | undefined;
try {
data = await readFile(source, 'utf8');
} catch {
//
}
if (!data) {
data = (
await (
await fetch(
`https://unicode.org/Public/emoji/${emojiVersion}/emoji-test.txt`
)
).text()
).toString();
await writeFile(source, data, 'utf8');
}
// Test content, unlink cache on failure
if (data.indexOf(`# Version: ${emojiVersion}`) === -1) {
try {
await unlink(source);
} catch {
//
}
return;
}
return data;
}
let data: string | undefined;
beforeAll(async () => {
data = await fetchEmojiTestData();
});
it('Simple regex', () => {
const regexValue = createOptimisedRegex([
'1F600',
'1F603',
'1F604',
'263A FE0F',
]);
const text1 = 'E1.0 grinning face: ';
const emoji1 = String.fromCodePoint(0x1f600);
const text2 = '\nE0.6 grinning face with big eyes: ';
const emoji2 = String.fromCodePoint(0x1f603);
const text3 = 'E1.0 grinning face: ';
const emoji3 = emoji1;
const text4 = 'E0.6 smiling face: ';
const emoji4 = '\u263A\uFE0F';
const text5 = '(fully-qualified)\nE0.6 smiling face: ';
const emoji5 = '\u263A';
const text6 = '(unqualified)';
const content =
text1 +
emoji1 +
text2 +
emoji2 +
text3 +
emoji3 +
text4 +
emoji4 +
text5 +
emoji5 +
text6;
const matches = getEmojiMatchesInText(regexValue, content);
expect(matches).toEqual([
{
match: '\u263A\uFE0F',
sequence: [0x263a],
keyword: '263a',
regexp: 0,
},
{
// Should be returned only once
match: String.fromCodePoint(0x1f600),
sequence: [0x1f600],
keyword: '1f600',
regexp: 0,
},
{
match: String.fromCodePoint(0x1f603),
sequence: [0x1f603],
keyword: '1f603',
regexp: 0,
},
{
// Same as first, but without 'FE0F'
match: '\u263A',
sequence: [0x263a],
keyword: '263a',
regexp: 0,
},
]);
const sortedMatches = sortEmojiMatchesInText(content, matches);
expect(sortedMatches).toEqual([
// Same order as in content
{
match: {
match: emoji1,
sequence: [0x1f600],
keyword: '1f600',
regexp: 0,
},
prev: text1,
next: text2,
},
{
match: {
match: emoji2,
sequence: [0x1f603],
keyword: '1f603',
regexp: 0,
},
prev: text2,
next: text3,
},
{
match: {
match: emoji3,
sequence: [0x1f600],
keyword: '1f600',
regexp: 0,
},
prev: text3,
next: text4,
},
{
match: {
match: emoji4,
sequence: [0x263a],
keyword: '263a',
regexp: 0,
},
prev: text4,
next: text5,
},
{
match: {
match: emoji5,
sequence: [0x263a],
keyword: '263a',
regexp: 0,
},
prev: text5,
next: text6,
},
]);
});
it('Multiple regex', () => {
const regex0 = createOptimisedRegex(['1F600', '1F603', '1F604']);
const regex1 = createOptimisedRegex(['263A FE0F']);
const text1 = 'E1.0 grinning face: ';
const emoji1 = String.fromCodePoint(0x1f600);
const text2 = '\nE0.6 grinning face with big eyes: ';
const emoji2 = String.fromCodePoint(0x1f603);
const text3 = 'E1.0 grinning face: ';
const emoji3 = emoji1;
const text4 = 'E0.6 smiling face: ';
const emoji4 = '\u263A\uFE0F';
const text5 = '(fully-qualified)\nE0.6 smiling face: ';
const emoji5 = '\u263A';
const text6 = '(unqualified)';
const content =
text1 +
emoji1 +
text2 +
emoji2 +
text3 +
emoji3 +
text4 +
emoji4 +
text5 +
emoji5 +
text6;
const matches = getEmojiMatchesInText([regex0, regex1], content);
expect(matches).toEqual([
{
match: '\u263A\uFE0F',
sequence: [0x263a],
keyword: '263a',
regexp: 1,
},
{
// Should be returned only once
match: String.fromCodePoint(0x1f600),
sequence: [0x1f600],
keyword: '1f600',
regexp: 0,
},
{
match: String.fromCodePoint(0x1f603),
sequence: [0x1f603],
keyword: '1f603',
regexp: 0,
},
{
// Same as first, but without 'FE0F'
match: '\u263A',
sequence: [0x263a],
keyword: '263a',
regexp: 1,
},
]);
const sortedMatches = sortEmojiMatchesInText(content, matches);
expect(sortedMatches).toEqual([
// Same order as in content
{
match: {
match: emoji1,
sequence: [0x1f600],
keyword: '1f600',
regexp: 0,
},
prev: text1,
next: text2,
},
{
match: {
match: emoji2,
sequence: [0x1f603],
keyword: '1f603',
regexp: 0,
},
prev: text2,
next: text3,
},
{
match: {
match: emoji3,
sequence: [0x1f600],
keyword: '1f600',
regexp: 0,
},
prev: text3,
next: text4,
},
{
match: {
match: emoji4,
sequence: [0x263a],
keyword: '263a',
regexp: 1,
},
prev: text4,
next: text5,
},
{
match: {
match: emoji5,
sequence: [0x263a],
keyword: '263a',
regexp: 1,
},
prev: text5,
next: text6,
},
]);
});
it('Sequences without spaces', () => {
const regex = createOptimisedRegex(['1F63A', '1F638', '1F639']);
const emoji1 = String.fromCodePoint(0x1f63a);
const emoji2 = String.fromCodePoint(0x1f638);
const emoji3 = String.fromCodePoint(0x1f639);
const content = emoji1 + emoji2 + emoji3 + emoji1 + emoji2;
const matches = getEmojiMatchesInText(regex, content);
expect(matches).toEqual([
{
match: '\uD83D\uDE38',
sequence: [0x1f638],
keyword: '1f638',
regexp: 0,
},
{
match: '\uD83D\uDE39',
sequence: [0x1f639],
keyword: '1f639',
regexp: 0,
},
{
match: '\uD83D\uDE3A',
sequence: [0x1f63a],
keyword: '1f63a',
regexp: 0,
},
]);
const sortedMatches = sortEmojiMatchesInText(content, matches);
expect(sortedMatches).toEqual([
// Same order as in content
{
match: {
match: '\uD83D\uDE3A',
sequence: [0x1f63a],
keyword: '1f63a',
regexp: 0,
},
prev: '',
next: '',
},
{
match: {
match: '\uD83D\uDE38',
sequence: [0x1f638],
keyword: '1f638',
regexp: 0,
},
prev: '',
next: '',
},
{
match: {
match: '\uD83D\uDE39',
sequence: [0x1f639],
keyword: '1f639',
regexp: 0,
},
prev: '',
next: '',
},
{
match: {
match: '\uD83D\uDE3A',
sequence: [0x1f63a],
keyword: '1f63a',
regexp: 0,
},
prev: '',
next: '',
},
{
match: {
match: '\uD83D\uDE38',
sequence: [0x1f638],
keyword: '1f638',
regexp: 0,
},
prev: '',
next: '',
},
]);
});
it('Finding all test emojis', () => {
if (!data) {
console.warn('Test skipped: test data is not available');
return;
}
// Parse test data
const testData = parseEmojiTestFile(data);
const sequences = Object.values(testData).map(({ sequence }) => {
return {
sequence,
};
});
// Get all icons
const iconsList = getQualifiedEmojiVariations(sequences);
// Get regex
const regexValue = createOptimisedRegexForEmojiSequences(
iconsList.map((item) => item.sequence)
);
const regex = new RegExp(regexValue, 'g');
sequences.forEach((sequence) => {
const text = sequence.sequence
.map((code) => String.fromCodePoint(code))
.join('');
// Test finding match
const result = getEmojiMatchesInText(regex, text);
// Must have only 1 item
if (result.length !== 1) {
console.log(
getEmojiSequenceString(sequence.sequence),
`(\\u${getEmojiSequenceString(sequence.sequence, {
format: 'utf-16',
separator: '\\u',
case: 'upper',
})})`,
text
);
result.forEach((match) => {
const sequence: number[] = [];
for (const codePoint of match.match) {
const num = codePoint.codePointAt(0) as number;
sequence.push(num);
}
console.log(
getEmojiSequenceString(sequence),
`(\\u${getEmojiSequenceString(sequence, {
format: 'utf-16',
separator: '\\u',
case: 'upper',
})})`
);
});
console.log(result);
expect(result.length).toBe(1);
}
const firstMatch = result[0];
const resultSequence = [];
for (const codePoint of firstMatch.match) {
const num = codePoint.codePointAt(0) as number;
resultSequence.push(num);
}
if (resultSequence.length !== sequence.sequence.length) {
console.log(
getEmojiSequenceString(sequence.sequence),
`(\\u${getEmojiSequenceString(sequence.sequence, {
format: 'utf-16',
separator: '\\u',
case: 'upper',
})})`,
result
);
}
expect(resultSequence).toEqual(sequence.sequence);
});
});
});