2023-01-01 15:40:07 +00:00
|
|
|
import { readFile, writeFile, unlink } from 'node:fs/promises';
|
|
|
|
import { parseEmojiTestFile } from '../lib/emoji/test/parse';
|
|
|
|
import { emojiVersion } from '../lib/emoji/data';
|
|
|
|
import {
|
|
|
|
createOptimisedRegex,
|
|
|
|
createOptimisedRegexForEmojiSequences,
|
|
|
|
} from '../lib/emoji/regex/create';
|
2022-12-14 18:49:50 +00:00
|
|
|
import {
|
|
|
|
getEmojiMatchesInText,
|
|
|
|
sortEmojiMatchesInText,
|
|
|
|
} from '../lib/emoji/replace/find';
|
2023-01-01 15:40:07 +00:00
|
|
|
import { getQualifiedEmojiVariations } from '../lib/emoji/test/variations';
|
|
|
|
import { getEmojiSequenceString } from '../lib/emoji/format';
|
2022-12-14 18:49:50 +00:00
|
|
|
|
|
|
|
describe('Finding emojis in text', () => {
|
2023-01-01 15:40:07 +00:00
|
|
|
async function fetchEmojiTestData(): Promise<string | undefined> {
|
|
|
|
// Fetch emojis, cache it
|
|
|
|
const source = `tests/fixtures/download-emoji-${emojiVersion}.txt`;
|
|
|
|
|
|
|
|
let data: string | undefined;
|
|
|
|
try {
|
|
|
|
data = await readFile(source, 'utf8');
|
|
|
|
} catch {
|
|
|
|
//
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!data) {
|
|
|
|
data = (
|
2024-09-14 04:53:24 +00:00
|
|
|
await (
|
|
|
|
await fetch(
|
|
|
|
`https://unicode.org/Public/emoji/${emojiVersion}/emoji-test.txt`
|
|
|
|
)
|
|
|
|
).text()
|
|
|
|
).toString();
|
2023-01-01 15:40:07 +00:00
|
|
|
await writeFile(source, data, 'utf8');
|
|
|
|
}
|
|
|
|
|
|
|
|
// Test content, unlink cache on failure
|
|
|
|
if (data.indexOf(`# Version: ${emojiVersion}`) === -1) {
|
|
|
|
try {
|
|
|
|
await unlink(source);
|
|
|
|
} catch {
|
|
|
|
//
|
|
|
|
}
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
return data;
|
|
|
|
}
|
|
|
|
|
|
|
|
let data: string | undefined;
|
|
|
|
|
|
|
|
beforeAll(async () => {
|
|
|
|
data = await fetchEmojiTestData();
|
|
|
|
});
|
|
|
|
|
2022-12-14 18:49:50 +00:00
|
|
|
it('Simple regex', () => {
|
|
|
|
const regexValue = createOptimisedRegex([
|
|
|
|
'1F600',
|
|
|
|
'1F603',
|
|
|
|
'1F604',
|
|
|
|
'263A FE0F',
|
|
|
|
]);
|
|
|
|
|
|
|
|
const text1 = 'E1.0 grinning face: ';
|
|
|
|
const emoji1 = String.fromCodePoint(0x1f600);
|
|
|
|
const text2 = '\nE0.6 grinning face with big eyes: ';
|
|
|
|
const emoji2 = String.fromCodePoint(0x1f603);
|
|
|
|
const text3 = 'E1.0 grinning face: ';
|
|
|
|
const emoji3 = emoji1;
|
|
|
|
const text4 = 'E0.6 smiling face: ';
|
|
|
|
const emoji4 = '\u263A\uFE0F';
|
|
|
|
const text5 = '(fully-qualified)\nE0.6 smiling face: ';
|
|
|
|
const emoji5 = '\u263A';
|
|
|
|
const text6 = '(unqualified)';
|
|
|
|
|
|
|
|
const content =
|
|
|
|
text1 +
|
|
|
|
emoji1 +
|
|
|
|
text2 +
|
|
|
|
emoji2 +
|
|
|
|
text3 +
|
|
|
|
emoji3 +
|
|
|
|
text4 +
|
|
|
|
emoji4 +
|
|
|
|
text5 +
|
|
|
|
emoji5 +
|
|
|
|
text6;
|
|
|
|
const matches = getEmojiMatchesInText(regexValue, content);
|
|
|
|
|
|
|
|
expect(matches).toEqual([
|
|
|
|
{
|
|
|
|
match: '\u263A\uFE0F',
|
|
|
|
sequence: [0x263a],
|
|
|
|
keyword: '263a',
|
2022-12-25 09:15:28 +00:00
|
|
|
regexp: 0,
|
2022-12-14 18:49:50 +00:00
|
|
|
},
|
|
|
|
{
|
|
|
|
// Should be returned only once
|
|
|
|
match: String.fromCodePoint(0x1f600),
|
|
|
|
sequence: [0x1f600],
|
|
|
|
keyword: '1f600',
|
2022-12-25 09:15:28 +00:00
|
|
|
regexp: 0,
|
2022-12-14 18:49:50 +00:00
|
|
|
},
|
|
|
|
{
|
|
|
|
match: String.fromCodePoint(0x1f603),
|
|
|
|
sequence: [0x1f603],
|
|
|
|
keyword: '1f603',
|
2022-12-25 09:15:28 +00:00
|
|
|
regexp: 0,
|
2022-12-14 18:49:50 +00:00
|
|
|
},
|
|
|
|
{
|
|
|
|
// Same as first, but without 'FE0F'
|
|
|
|
match: '\u263A',
|
|
|
|
sequence: [0x263a],
|
|
|
|
keyword: '263a',
|
2022-12-25 09:15:28 +00:00
|
|
|
regexp: 0,
|
2022-12-14 18:49:50 +00:00
|
|
|
},
|
|
|
|
]);
|
|
|
|
|
|
|
|
const sortedMatches = sortEmojiMatchesInText(content, matches);
|
|
|
|
expect(sortedMatches).toEqual([
|
|
|
|
// Same order as in content
|
|
|
|
{
|
2022-12-14 21:37:22 +00:00
|
|
|
match: {
|
|
|
|
match: emoji1,
|
|
|
|
sequence: [0x1f600],
|
|
|
|
keyword: '1f600',
|
2022-12-25 09:15:28 +00:00
|
|
|
regexp: 0,
|
2022-12-14 21:37:22 +00:00
|
|
|
},
|
2022-12-14 18:49:50 +00:00
|
|
|
prev: text1,
|
|
|
|
next: text2,
|
|
|
|
},
|
|
|
|
{
|
2022-12-14 21:37:22 +00:00
|
|
|
match: {
|
|
|
|
match: emoji2,
|
|
|
|
sequence: [0x1f603],
|
|
|
|
keyword: '1f603',
|
2022-12-25 09:15:28 +00:00
|
|
|
regexp: 0,
|
2022-12-14 21:37:22 +00:00
|
|
|
},
|
2022-12-14 18:49:50 +00:00
|
|
|
prev: text2,
|
|
|
|
next: text3,
|
|
|
|
},
|
|
|
|
{
|
2022-12-14 21:37:22 +00:00
|
|
|
match: {
|
|
|
|
match: emoji3,
|
|
|
|
sequence: [0x1f600],
|
|
|
|
keyword: '1f600',
|
2022-12-25 09:15:28 +00:00
|
|
|
regexp: 0,
|
2022-12-14 21:37:22 +00:00
|
|
|
},
|
2022-12-14 18:49:50 +00:00
|
|
|
prev: text3,
|
|
|
|
next: text4,
|
|
|
|
},
|
|
|
|
{
|
2022-12-14 21:37:22 +00:00
|
|
|
match: {
|
|
|
|
match: emoji4,
|
|
|
|
sequence: [0x263a],
|
|
|
|
keyword: '263a',
|
2022-12-25 09:15:28 +00:00
|
|
|
regexp: 0,
|
2022-12-14 21:37:22 +00:00
|
|
|
},
|
2022-12-14 18:49:50 +00:00
|
|
|
prev: text4,
|
|
|
|
next: text5,
|
|
|
|
},
|
|
|
|
{
|
2022-12-14 21:37:22 +00:00
|
|
|
match: {
|
|
|
|
match: emoji5,
|
|
|
|
sequence: [0x263a],
|
|
|
|
keyword: '263a',
|
2022-12-25 09:15:28 +00:00
|
|
|
regexp: 0,
|
|
|
|
},
|
|
|
|
prev: text5,
|
|
|
|
next: text6,
|
|
|
|
},
|
|
|
|
]);
|
|
|
|
});
|
|
|
|
|
|
|
|
it('Multiple regex', () => {
|
|
|
|
const regex0 = createOptimisedRegex(['1F600', '1F603', '1F604']);
|
|
|
|
const regex1 = createOptimisedRegex(['263A FE0F']);
|
|
|
|
|
|
|
|
const text1 = 'E1.0 grinning face: ';
|
|
|
|
const emoji1 = String.fromCodePoint(0x1f600);
|
|
|
|
const text2 = '\nE0.6 grinning face with big eyes: ';
|
|
|
|
const emoji2 = String.fromCodePoint(0x1f603);
|
|
|
|
const text3 = 'E1.0 grinning face: ';
|
|
|
|
const emoji3 = emoji1;
|
|
|
|
const text4 = 'E0.6 smiling face: ';
|
|
|
|
const emoji4 = '\u263A\uFE0F';
|
|
|
|
const text5 = '(fully-qualified)\nE0.6 smiling face: ';
|
|
|
|
const emoji5 = '\u263A';
|
|
|
|
const text6 = '(unqualified)';
|
|
|
|
|
|
|
|
const content =
|
|
|
|
text1 +
|
|
|
|
emoji1 +
|
|
|
|
text2 +
|
|
|
|
emoji2 +
|
|
|
|
text3 +
|
|
|
|
emoji3 +
|
|
|
|
text4 +
|
|
|
|
emoji4 +
|
|
|
|
text5 +
|
|
|
|
emoji5 +
|
|
|
|
text6;
|
|
|
|
const matches = getEmojiMatchesInText([regex0, regex1], content);
|
|
|
|
|
|
|
|
expect(matches).toEqual([
|
|
|
|
{
|
|
|
|
match: '\u263A\uFE0F',
|
|
|
|
sequence: [0x263a],
|
|
|
|
keyword: '263a',
|
|
|
|
regexp: 1,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
// Should be returned only once
|
|
|
|
match: String.fromCodePoint(0x1f600),
|
|
|
|
sequence: [0x1f600],
|
|
|
|
keyword: '1f600',
|
|
|
|
regexp: 0,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
match: String.fromCodePoint(0x1f603),
|
|
|
|
sequence: [0x1f603],
|
|
|
|
keyword: '1f603',
|
|
|
|
regexp: 0,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
// Same as first, but without 'FE0F'
|
|
|
|
match: '\u263A',
|
|
|
|
sequence: [0x263a],
|
|
|
|
keyword: '263a',
|
|
|
|
regexp: 1,
|
|
|
|
},
|
|
|
|
]);
|
|
|
|
|
|
|
|
const sortedMatches = sortEmojiMatchesInText(content, matches);
|
|
|
|
expect(sortedMatches).toEqual([
|
|
|
|
// Same order as in content
|
|
|
|
{
|
|
|
|
match: {
|
|
|
|
match: emoji1,
|
|
|
|
sequence: [0x1f600],
|
|
|
|
keyword: '1f600',
|
|
|
|
regexp: 0,
|
|
|
|
},
|
|
|
|
prev: text1,
|
|
|
|
next: text2,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
match: {
|
|
|
|
match: emoji2,
|
|
|
|
sequence: [0x1f603],
|
|
|
|
keyword: '1f603',
|
|
|
|
regexp: 0,
|
|
|
|
},
|
|
|
|
prev: text2,
|
|
|
|
next: text3,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
match: {
|
|
|
|
match: emoji3,
|
|
|
|
sequence: [0x1f600],
|
|
|
|
keyword: '1f600',
|
|
|
|
regexp: 0,
|
|
|
|
},
|
|
|
|
prev: text3,
|
|
|
|
next: text4,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
match: {
|
|
|
|
match: emoji4,
|
|
|
|
sequence: [0x263a],
|
|
|
|
keyword: '263a',
|
|
|
|
regexp: 1,
|
|
|
|
},
|
|
|
|
prev: text4,
|
|
|
|
next: text5,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
match: {
|
|
|
|
match: emoji5,
|
|
|
|
sequence: [0x263a],
|
|
|
|
keyword: '263a',
|
|
|
|
regexp: 1,
|
2022-12-14 21:37:22 +00:00
|
|
|
},
|
2022-12-14 18:49:50 +00:00
|
|
|
prev: text5,
|
|
|
|
next: text6,
|
|
|
|
},
|
|
|
|
]);
|
|
|
|
});
|
2022-12-27 21:27:05 +00:00
|
|
|
|
|
|
|
it('Sequences without spaces', () => {
|
|
|
|
const regex = createOptimisedRegex(['1F63A', '1F638', '1F639']);
|
|
|
|
|
|
|
|
const emoji1 = String.fromCodePoint(0x1f63a);
|
|
|
|
const emoji2 = String.fromCodePoint(0x1f638);
|
|
|
|
const emoji3 = String.fromCodePoint(0x1f639);
|
|
|
|
|
|
|
|
const content = emoji1 + emoji2 + emoji3 + emoji1 + emoji2;
|
|
|
|
const matches = getEmojiMatchesInText(regex, content);
|
|
|
|
|
|
|
|
expect(matches).toEqual([
|
|
|
|
{
|
|
|
|
match: '\uD83D\uDE38',
|
|
|
|
sequence: [0x1f638],
|
|
|
|
keyword: '1f638',
|
|
|
|
regexp: 0,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
match: '\uD83D\uDE39',
|
|
|
|
sequence: [0x1f639],
|
|
|
|
keyword: '1f639',
|
|
|
|
regexp: 0,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
match: '\uD83D\uDE3A',
|
|
|
|
sequence: [0x1f63a],
|
|
|
|
keyword: '1f63a',
|
|
|
|
regexp: 0,
|
|
|
|
},
|
|
|
|
]);
|
|
|
|
|
|
|
|
const sortedMatches = sortEmojiMatchesInText(content, matches);
|
|
|
|
expect(sortedMatches).toEqual([
|
|
|
|
// Same order as in content
|
|
|
|
{
|
|
|
|
match: {
|
|
|
|
match: '\uD83D\uDE3A',
|
|
|
|
sequence: [0x1f63a],
|
|
|
|
keyword: '1f63a',
|
|
|
|
regexp: 0,
|
|
|
|
},
|
|
|
|
prev: '',
|
|
|
|
next: '',
|
|
|
|
},
|
|
|
|
{
|
|
|
|
match: {
|
|
|
|
match: '\uD83D\uDE38',
|
|
|
|
sequence: [0x1f638],
|
|
|
|
keyword: '1f638',
|
|
|
|
regexp: 0,
|
|
|
|
},
|
|
|
|
prev: '',
|
|
|
|
next: '',
|
|
|
|
},
|
|
|
|
{
|
|
|
|
match: {
|
|
|
|
match: '\uD83D\uDE39',
|
|
|
|
sequence: [0x1f639],
|
|
|
|
keyword: '1f639',
|
|
|
|
regexp: 0,
|
|
|
|
},
|
|
|
|
prev: '',
|
|
|
|
next: '',
|
|
|
|
},
|
|
|
|
{
|
|
|
|
match: {
|
|
|
|
match: '\uD83D\uDE3A',
|
|
|
|
sequence: [0x1f63a],
|
|
|
|
keyword: '1f63a',
|
|
|
|
regexp: 0,
|
|
|
|
},
|
|
|
|
prev: '',
|
|
|
|
next: '',
|
|
|
|
},
|
|
|
|
{
|
|
|
|
match: {
|
|
|
|
match: '\uD83D\uDE38',
|
|
|
|
sequence: [0x1f638],
|
|
|
|
keyword: '1f638',
|
|
|
|
regexp: 0,
|
|
|
|
},
|
|
|
|
prev: '',
|
|
|
|
next: '',
|
|
|
|
},
|
|
|
|
]);
|
|
|
|
});
|
2023-01-01 15:40:07 +00:00
|
|
|
|
|
|
|
it('Finding all test emojis', () => {
|
|
|
|
if (!data) {
|
|
|
|
console.warn('Test skipped: test data is not available');
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Parse test data
|
|
|
|
const testData = parseEmojiTestFile(data);
|
|
|
|
const sequences = Object.values(testData).map(({ sequence }) => {
|
|
|
|
return {
|
|
|
|
sequence,
|
|
|
|
};
|
|
|
|
});
|
|
|
|
|
|
|
|
// Get all icons
|
2023-01-01 18:59:37 +00:00
|
|
|
const iconsList = getQualifiedEmojiVariations(sequences);
|
2023-01-01 15:40:07 +00:00
|
|
|
|
|
|
|
// Get regex
|
|
|
|
const regexValue = createOptimisedRegexForEmojiSequences(
|
|
|
|
iconsList.map((item) => item.sequence)
|
|
|
|
);
|
|
|
|
const regex = new RegExp(regexValue, 'g');
|
|
|
|
|
|
|
|
sequences.forEach((sequence) => {
|
|
|
|
const text = sequence.sequence
|
|
|
|
.map((code) => String.fromCodePoint(code))
|
|
|
|
.join('');
|
|
|
|
|
|
|
|
// Test finding match
|
|
|
|
const result = getEmojiMatchesInText(regex, text);
|
|
|
|
|
|
|
|
// Must have only 1 item
|
|
|
|
if (result.length !== 1) {
|
|
|
|
console.log(
|
|
|
|
getEmojiSequenceString(sequence.sequence),
|
|
|
|
`(\\u${getEmojiSequenceString(sequence.sequence, {
|
|
|
|
format: 'utf-16',
|
|
|
|
separator: '\\u',
|
|
|
|
case: 'upper',
|
|
|
|
})})`,
|
|
|
|
text
|
|
|
|
);
|
|
|
|
result.forEach((match) => {
|
|
|
|
const sequence: number[] = [];
|
|
|
|
for (const codePoint of match.match) {
|
|
|
|
const num = codePoint.codePointAt(0) as number;
|
|
|
|
sequence.push(num);
|
|
|
|
}
|
|
|
|
console.log(
|
|
|
|
getEmojiSequenceString(sequence),
|
|
|
|
`(\\u${getEmojiSequenceString(sequence, {
|
|
|
|
format: 'utf-16',
|
|
|
|
separator: '\\u',
|
|
|
|
case: 'upper',
|
|
|
|
})})`
|
|
|
|
);
|
|
|
|
});
|
|
|
|
console.log(result);
|
|
|
|
expect(result.length).toBe(1);
|
|
|
|
}
|
|
|
|
|
|
|
|
const firstMatch = result[0];
|
|
|
|
const resultSequence = [];
|
|
|
|
for (const codePoint of firstMatch.match) {
|
|
|
|
const num = codePoint.codePointAt(0) as number;
|
|
|
|
resultSequence.push(num);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (resultSequence.length !== sequence.sequence.length) {
|
|
|
|
console.log(
|
|
|
|
getEmojiSequenceString(sequence.sequence),
|
|
|
|
`(\\u${getEmojiSequenceString(sequence.sequence, {
|
|
|
|
format: 'utf-16',
|
|
|
|
separator: '\\u',
|
|
|
|
case: 'upper',
|
|
|
|
})})`,
|
|
|
|
result
|
|
|
|
);
|
|
|
|
}
|
|
|
|
expect(resultSequence).toEqual(sequence.sequence);
|
|
|
|
});
|
|
|
|
});
|
2022-12-14 18:49:50 +00:00
|
|
|
});
|