2
0
mirror of https://github.com/iconify/iconify.git synced 2025-01-07 15:44:05 +00:00

fix(utils): do not use test data to get full emoji sequence

This commit is contained in:
Vjacheslav Trushkin 2023-01-01 20:59:37 +02:00
parent e443b2ae12
commit b6effea974
8 changed files with 32 additions and 168 deletions

View File

@ -3,7 +3,7 @@
"type": "module", "type": "module",
"description": "Common functions for working with Iconify icon sets used by various packages.", "description": "Common functions for working with Iconify icon sets used by various packages.",
"author": "Vjacheslav Trushkin", "author": "Vjacheslav Trushkin",
"version": "2.0.7", "version": "2.0.8",
"license": "MIT", "license": "MIT",
"bugs": "https://github.com/iconify/iconify/issues", "bugs": "https://github.com/iconify/iconify/issues",
"homepage": "https://iconify.design/", "homepage": "https://iconify.design/",

View File

@ -61,7 +61,7 @@ export function prepareEmojiForIconsList(
} }
// Get fully-qualified versions of emojis // Get fully-qualified versions of emojis
iconsList = getQualifiedEmojiVariations(iconsList, testData); iconsList = getQualifiedEmojiVariations(iconsList);
// Find and add missing emojis if test data is available // Find and add missing emojis if test data is available
if (testData) { if (testData) {

View File

@ -1,6 +1,5 @@
import { getSequenceFromEmojiStringOrKeyword } from '../cleanup'; import { getSequenceFromEmojiStringOrKeyword } from '../cleanup';
import { convertEmojiSequenceToUTF32 } from '../convert'; import { convertEmojiSequenceToUTF32 } from '../convert';
import type { EmojiTestData } from '../test/parse';
import { getQualifiedEmojiVariations } from '../test/variations'; import { getQualifiedEmojiVariations } from '../test/variations';
import { createEmojisTree, parseEmojiTree } from './tree'; import { createEmojisTree, parseEmojiTree } from './tree';
@ -37,10 +36,7 @@ export function createOptimisedRegexForEmojiSequences(
* *
* All examples above refer to the same emoji and will generate the same regex result * All examples above refer to the same emoji and will generate the same regex result
*/ */
export function createOptimisedRegex( export function createOptimisedRegex(emojis: (string | number[])[]): string {
emojis: (string | number[])[],
testData?: EmojiTestData
): string {
// Convert to numbers // Convert to numbers
let sequences = emojis.map((item) => let sequences = emojis.map((item) =>
typeof item === 'string' typeof item === 'string'
@ -55,8 +51,7 @@ export function createOptimisedRegex(
return { return {
sequence, sequence,
}; };
}), })
testData
).map((item) => item.sequence); ).map((item) => item.sequence);
// Parse // Parse

View File

@ -34,7 +34,7 @@ export function mapEmojiTestDataComponents(
for (const key in emojiComponents) { for (const key in emojiComponents) {
const type = key as EmojiComponentType; const type = key as EmojiComponentType;
const range = emojiComponents[type]; const range = emojiComponents[type];
for (let number = range[0]; number <= range[1]; number++) { for (let number = range[0]; number < range[1]; number++) {
const keyword = getEmojiSequenceKeyword([number]); const keyword = getEmojiSequenceKeyword([number]);
const item = testSequences[keyword]; const item = testSequences[keyword];
if (!item) { if (!item) {

View File

@ -4,9 +4,8 @@ import {
splitEmojiSequences, splitEmojiSequences,
} from '../cleanup'; } from '../cleanup';
import { convertEmojiSequenceToUTF32 } from '../convert'; import { convertEmojiSequenceToUTF32 } from '../convert';
import { keycapEmoji, vs16Emoji } from '../data'; import { emojiComponents, keycapEmoji, vs16Emoji } from '../data';
import { getEmojiSequenceKeyword } from '../format'; import { getEmojiSequenceKeyword } from '../format';
import type { EmojiTestData } from './parse';
/** /**
* Get qualified sequence, adding optional `FE0F` wherever it might exist * Get qualified sequence, adding optional `FE0F` wherever it might exist
@ -24,9 +23,21 @@ export function guessQualifiedEmojiSequence(sequence: number[]): number[] {
return part; return part;
} }
// Check for keycap // Check for keycap and components
if (part.length === 2 && part[1] === keycapEmoji) { if (part.length === 2) {
return [part[0], vs16Emoji, part[1]]; const lastNum = part[1];
if (lastNum === keycapEmoji) {
// emoji + keycap
return [part[0], vs16Emoji, lastNum];
}
for (const key in emojiComponents) {
const range =
emojiComponents[key as keyof typeof emojiComponents];
if (lastNum >= range[0] && lastNum < range[1]) {
// emoji + component
return [part[0], vs16Emoji, lastNum];
}
}
} }
// Add `FE0F` to 1 character emojis // Add `FE0F` to 1 character emojis
@ -50,33 +61,21 @@ interface BaseSequenceItem {
* Get qualified variations for emojis * Get qualified variations for emojis
* *
* Also converts list to UTF-32 as needed and removes duplicate items * Also converts list to UTF-32 as needed and removes duplicate items
*
* `testData`, returned by parseEmojiTestFile() is used to check which emojis have `FE0F` variations.
* If missing or emoji is missing in test data, `FE0F` is added to every single code emoji.
* It can also be an array of sequences.
*/ */
export function getQualifiedEmojiVariation<T extends BaseSequenceItem>( export function getQualifiedEmojiVariation<T extends BaseSequenceItem>(
item: T, item: T
testData?: EmojiTestData
): T { ): T {
// Convert to UTF-32, get unqualified sequence // Convert to UTF-32, get unqualified sequence
const unqualifiedSequence = getUnqualifiedEmojiSequence( const unqualifiedSequence = getUnqualifiedEmojiSequence(
convertEmojiSequenceToUTF32(item.sequence) convertEmojiSequenceToUTF32(item.sequence)
); );
// Check test data. Key is unqualified sequence
const key = getEmojiSequenceKeyword(unqualifiedSequence);
const testDataItem = testData?.[key];
const result: T = { const result: T = {
...item, ...item,
sequence: testDataItem sequence: guessQualifiedEmojiSequence(unqualifiedSequence),
? testDataItem.sequence
: guessQualifiedEmojiSequence(unqualifiedSequence),
}; };
if (result.sequenceKey) { if (result.sequenceKey) {
result.sequenceKey = key; result.sequenceKey = getEmojiSequenceKeyword(unqualifiedSequence);
} }
return result; return result;
} }
@ -85,14 +84,13 @@ export function getQualifiedEmojiVariation<T extends BaseSequenceItem>(
* Get qualified emoji variations for set of emojis, ignoring duplicate entries * Get qualified emoji variations for set of emojis, ignoring duplicate entries
*/ */
export function getQualifiedEmojiVariations<T extends BaseSequenceItem>( export function getQualifiedEmojiVariations<T extends BaseSequenceItem>(
items: T[], items: T[]
testData?: EmojiTestData
): T[] { ): T[] {
// Parse all sequences // Parse all sequences
const results = Object.create(null) as Record<string, T>; const results = Object.create(null) as Record<string, T>;
for (let i = 0; i < items.length; i++) { for (let i = 0; i < items.length; i++) {
const result = getQualifiedEmojiVariation(items[i], testData); const result = getQualifiedEmojiVariation(items[i]);
const key = getEmojiSequenceKeyword( const key = getEmojiSequenceKeyword(
getUnqualifiedEmojiSequence(result.sequence) getUnqualifiedEmojiSequence(result.sequence)
); );

View File

@ -1,45 +1,8 @@
import { readFile, writeFile, unlink } from 'node:fs/promises';
import { emojiVersion } from '../lib/emoji/data';
import { getEmojiSequenceFromString } from '../lib/emoji/cleanup'; import { getEmojiSequenceFromString } from '../lib/emoji/cleanup';
import { getEmojiSequenceString } from '../lib/emoji/format'; import { getEmojiSequenceString } from '../lib/emoji/format';
import { parseEmojiTestFile } from '../lib/emoji/test/parse';
import { getQualifiedEmojiVariations } from '../lib/emoji/test/variations'; import { getQualifiedEmojiVariations } from '../lib/emoji/test/variations';
describe('Qualified variations of emoji sequences', () => { describe('Qualified variations of emoji sequences', () => {
async function fetchEmojiTestData(): Promise<string | undefined> {
// Fetch emojis, cache it
const source = `tests/fixtures/download-emoji-${emojiVersion}.txt`;
let data: string | undefined;
try {
data = await readFile(source, 'utf8');
} catch {
//
}
if (!data) {
data = (
await fetch(
`https://unicode.org/Public/emoji/${emojiVersion}/emoji-test.txt`
)
)
.text()
.toString();
await writeFile(source, data, 'utf8');
}
// Test content, unlink cache on failure
if (data.indexOf(`# Version: ${emojiVersion}`) === -1) {
try {
await unlink(source);
} catch {
//
}
return;
}
return data;
}
it('Variations without test data', () => { it('Variations without test data', () => {
const sequences = [ const sequences = [
// simple emoji, twice to check duplicates // simple emoji, twice to check duplicates
@ -90,82 +53,8 @@ describe('Qualified variations of emoji sequences', () => {
'1F1E6 1F1F8', '1F1E6 1F1F8',
'1F3F4 E0067 E0062 E0065 E006E E0067 E007F', '1F3F4 E0067 E0062 E0065 E006E E0067 E007F',
// mix of simple and complex, with and without variation // mix of simple and complex, with and without variation
'1F9D7 1F3FE 200D 2640 FE0F', '1F9D7 FE0F 1F3FE 200D 2640 FE0F',
'1F9D7 1F3FF 200D 2642 FE0F', '1F9D7 FE0F 1F3FF 200D 2642 FE0F',
]);
});
it('Variations with test data', async () => {
// Fetch emojis, cache it
const data = await fetchEmojiTestData();
if (!data) {
console.warn('Test skipped: test data is not available');
return;
}
const testData = parseEmojiTestFile(data);
// Make sure testData keys contain only unqualified emojis
const testDataStrings = new Set(Object.keys(testData));
expect(testDataStrings.has('1f600')).toBe(true);
expect(testDataStrings.has('263a')).toBe(true);
expect(testDataStrings.has('263a-fe0f')).toBe(false);
// Make sure values contain qualified emojis
expect(testData['1f600'].sequence).toEqual([0x1f600]);
expect(testData['263a'].sequence).toEqual([0x263a, 0xfe0f]);
// Sequences to test
const sequences = [
// emoji without variation in test file
'1F601',
'1F635 200D 1F4AB',
// emojis without variations in test file, but variations in source
'1F60D FE0F',
// emoji that has variation in test file
'263A',
// keycap
'0030 20E3',
'0034 FE0F 20E3',
// complex emoji, exists in file
'1F9D1 1F3FE 200D 2764 200D 1F9D1 1F3FB',
// simple emoji, not in test file
'25F0',
// fake keycap, not in test file
'2345 20E3 200D 1235',
].map((source) => {
const sequence = getEmojiSequenceFromString(source);
return {
source,
sequence,
};
});
const results = getQualifiedEmojiVariations(sequences, testData);
expect(
results.map((item) =>
getEmojiSequenceString(item.sequence, {
separator: ' ',
case: 'upper',
format: 'utf-32',
add0: true,
})
)
).toEqual([
// emoji without variation in test file
'1F601',
'1F635 200D 1F4AB',
// emojis without variations in test file, but variations in source
'1F60D',
// emoji that has variation in test file
'263A FE0F',
// keycap
'0030 FE0F 20E3',
'0034 FE0F 20E3',
// complex emoji, exists in file
'1F9D1 1F3FE 200D 2764 FE0F 200D 1F9D1 1F3FB',
// simple emoji, not in test file
'25F0 FE0F',
// fake keycap, not in test file
'2345 FE0F 20E3 200D 1235 FE0F',
]); ]);
}); });
}); });

View File

@ -96,25 +96,7 @@ describe('Testing unicode test data', () => {
return; return;
} }
// One emoji without variation // One emoji
expect(
prepareEmojiForIconsList(
{
'2615': 'hot-beverage',
},
data
)
).toEqual({
icons: [
{
icon: 'hot-beverage',
sequence: '2615',
},
],
regex: '\\u2615',
});
// One emoji with variation
expect( expect(
prepareEmojiForIconsList( prepareEmojiForIconsList(
{ {
@ -169,7 +151,7 @@ describe('Testing unicode test data', () => {
sequence: '270b-1f3ff', sequence: '270b-1f3ff',
}, },
], ],
regex: '\\u270B(?:\\uD83C[\\uDFFB-\\uDFFF])?', regex: '\\u270B(?:\\uD83C[\\uDFFB-\\uDFFF]|\\uFE0F?)',
}); });
// Multiple emojis, all without variations // Multiple emojis, all without variations
@ -202,7 +184,7 @@ describe('Testing unicode test data', () => {
sequence: '1f1e6-1f1ea', sequence: '1f1e6-1f1ea',
}, },
], ],
regex: '\\uD83C\\uDDE6\\uD83C[\\uDDE8-\\uDDEA]|\\u2615', regex: '\\uD83C\\uDDE6\\uD83C[\\uDDE8-\\uDDEA]|\\u2615\\uFE0F?',
}); });
}); });
}); });

View File

@ -389,7 +389,7 @@ describe('Finding emojis in text', () => {
}); });
// Get all icons // Get all icons
const iconsList = getQualifiedEmojiVariations(sequences, testData); const iconsList = getQualifiedEmojiVariations(sequences);
// Get regex // Get regex
const regexValue = createOptimisedRegexForEmojiSequences( const regexValue = createOptimisedRegexForEmojiSequences(