2
0
mirror of https://github.com/iconify/iconify.git synced 2025-01-22 14:48:24 +00:00

fix(utils): do not use test data to get full emoji sequence

This commit is contained in:
Vjacheslav Trushkin 2023-01-01 20:59:37 +02:00
parent e443b2ae12
commit b6effea974
8 changed files with 32 additions and 168 deletions

View File

@ -3,7 +3,7 @@
"type": "module",
"description": "Common functions for working with Iconify icon sets used by various packages.",
"author": "Vjacheslav Trushkin",
"version": "2.0.7",
"version": "2.0.8",
"license": "MIT",
"bugs": "https://github.com/iconify/iconify/issues",
"homepage": "https://iconify.design/",

View File

@ -61,7 +61,7 @@ export function prepareEmojiForIconsList(
}
// Get fully-qualified versions of emojis
iconsList = getQualifiedEmojiVariations(iconsList, testData);
iconsList = getQualifiedEmojiVariations(iconsList);
// Find and add missing emojis if test data is available
if (testData) {

View File

@ -1,6 +1,5 @@
import { getSequenceFromEmojiStringOrKeyword } from '../cleanup';
import { convertEmojiSequenceToUTF32 } from '../convert';
import type { EmojiTestData } from '../test/parse';
import { getQualifiedEmojiVariations } from '../test/variations';
import { createEmojisTree, parseEmojiTree } from './tree';
@ -37,10 +36,7 @@ export function createOptimisedRegexForEmojiSequences(
*
* All examples above refer to the same emoji and will generate the same regex result
*/
export function createOptimisedRegex(
emojis: (string | number[])[],
testData?: EmojiTestData
): string {
export function createOptimisedRegex(emojis: (string | number[])[]): string {
// Convert to numbers
let sequences = emojis.map((item) =>
typeof item === 'string'
@ -55,8 +51,7 @@ export function createOptimisedRegex(
return {
sequence,
};
}),
testData
})
).map((item) => item.sequence);
// Parse

View File

@ -34,7 +34,7 @@ export function mapEmojiTestDataComponents(
for (const key in emojiComponents) {
const type = key as EmojiComponentType;
const range = emojiComponents[type];
for (let number = range[0]; number <= range[1]; number++) {
for (let number = range[0]; number < range[1]; number++) {
const keyword = getEmojiSequenceKeyword([number]);
const item = testSequences[keyword];
if (!item) {

View File

@ -4,9 +4,8 @@ import {
splitEmojiSequences,
} from '../cleanup';
import { convertEmojiSequenceToUTF32 } from '../convert';
import { keycapEmoji, vs16Emoji } from '../data';
import { emojiComponents, keycapEmoji, vs16Emoji } from '../data';
import { getEmojiSequenceKeyword } from '../format';
import type { EmojiTestData } from './parse';
/**
* Get qualified sequence, adding optional `FE0F` wherever it might exist
@ -24,9 +23,21 @@ export function guessQualifiedEmojiSequence(sequence: number[]): number[] {
return part;
}
// Check for keycap
if (part.length === 2 && part[1] === keycapEmoji) {
return [part[0], vs16Emoji, part[1]];
// Check for keycap and components
if (part.length === 2) {
const lastNum = part[1];
if (lastNum === keycapEmoji) {
// emoji + keycap
return [part[0], vs16Emoji, lastNum];
}
for (const key in emojiComponents) {
const range =
emojiComponents[key as keyof typeof emojiComponents];
if (lastNum >= range[0] && lastNum < range[1]) {
// emoji + component
return [part[0], vs16Emoji, lastNum];
}
}
}
// Add `FE0F` to 1 character emojis
@ -50,33 +61,21 @@ interface BaseSequenceItem {
* Get qualified variations for emojis
*
* Also converts list to UTF-32 as needed and removes duplicate items
*
* `testData`, returned by parseEmojiTestFile() is used to check which emojis have `FE0F` variations.
* If missing or emoji is missing in test data, `FE0F` is added to every single code emoji.
* It can also be an array of sequences.
*/
export function getQualifiedEmojiVariation<T extends BaseSequenceItem>(
item: T,
testData?: EmojiTestData
item: T
): T {
// Convert to UTF-32, get unqualified sequence
const unqualifiedSequence = getUnqualifiedEmojiSequence(
convertEmojiSequenceToUTF32(item.sequence)
);
// Check test data. Key is unqualified sequence
const key = getEmojiSequenceKeyword(unqualifiedSequence);
const testDataItem = testData?.[key];
const result: T = {
...item,
sequence: testDataItem
? testDataItem.sequence
: guessQualifiedEmojiSequence(unqualifiedSequence),
sequence: guessQualifiedEmojiSequence(unqualifiedSequence),
};
if (result.sequenceKey) {
result.sequenceKey = key;
result.sequenceKey = getEmojiSequenceKeyword(unqualifiedSequence);
}
return result;
}
@ -85,14 +84,13 @@ export function getQualifiedEmojiVariation<T extends BaseSequenceItem>(
* Get qualified emoji variations for set of emojis, ignoring duplicate entries
*/
export function getQualifiedEmojiVariations<T extends BaseSequenceItem>(
items: T[],
testData?: EmojiTestData
items: T[]
): T[] {
// Parse all sequences
const results = Object.create(null) as Record<string, T>;
for (let i = 0; i < items.length; i++) {
const result = getQualifiedEmojiVariation(items[i], testData);
const result = getQualifiedEmojiVariation(items[i]);
const key = getEmojiSequenceKeyword(
getUnqualifiedEmojiSequence(result.sequence)
);

View File

@ -1,45 +1,8 @@
import { readFile, writeFile, unlink } from 'node:fs/promises';
import { emojiVersion } from '../lib/emoji/data';
import { getEmojiSequenceFromString } from '../lib/emoji/cleanup';
import { getEmojiSequenceString } from '../lib/emoji/format';
import { parseEmojiTestFile } from '../lib/emoji/test/parse';
import { getQualifiedEmojiVariations } from '../lib/emoji/test/variations';
describe('Qualified variations of emoji sequences', () => {
async function fetchEmojiTestData(): Promise<string | undefined> {
// Fetch emojis, cache it
const source = `tests/fixtures/download-emoji-${emojiVersion}.txt`;
let data: string | undefined;
try {
data = await readFile(source, 'utf8');
} catch {
//
}
if (!data) {
data = (
await fetch(
`https://unicode.org/Public/emoji/${emojiVersion}/emoji-test.txt`
)
)
.text()
.toString();
await writeFile(source, data, 'utf8');
}
// Test content, unlink cache on failure
if (data.indexOf(`# Version: ${emojiVersion}`) === -1) {
try {
await unlink(source);
} catch {
//
}
return;
}
return data;
}
it('Variations without test data', () => {
const sequences = [
// simple emoji, twice to check duplicates
@ -90,82 +53,8 @@ describe('Qualified variations of emoji sequences', () => {
'1F1E6 1F1F8',
'1F3F4 E0067 E0062 E0065 E006E E0067 E007F',
// mix of simple and complex, with and without variation
'1F9D7 1F3FE 200D 2640 FE0F',
'1F9D7 1F3FF 200D 2642 FE0F',
]);
});
it('Variations with test data', async () => {
// Fetch emojis, cache it
const data = await fetchEmojiTestData();
if (!data) {
console.warn('Test skipped: test data is not available');
return;
}
const testData = parseEmojiTestFile(data);
// Make sure testData keys contain only unqualified emojis
const testDataStrings = new Set(Object.keys(testData));
expect(testDataStrings.has('1f600')).toBe(true);
expect(testDataStrings.has('263a')).toBe(true);
expect(testDataStrings.has('263a-fe0f')).toBe(false);
// Make sure values contain qualified emojis
expect(testData['1f600'].sequence).toEqual([0x1f600]);
expect(testData['263a'].sequence).toEqual([0x263a, 0xfe0f]);
// Sequences to test
const sequences = [
// emoji without variation in test file
'1F601',
'1F635 200D 1F4AB',
// emojis without variations in test file, but variations in source
'1F60D FE0F',
// emoji that has variation in test file
'263A',
// keycap
'0030 20E3',
'0034 FE0F 20E3',
// complex emoji, exists in file
'1F9D1 1F3FE 200D 2764 200D 1F9D1 1F3FB',
// simple emoji, not in test file
'25F0',
// fake keycap, not in test file
'2345 20E3 200D 1235',
].map((source) => {
const sequence = getEmojiSequenceFromString(source);
return {
source,
sequence,
};
});
const results = getQualifiedEmojiVariations(sequences, testData);
expect(
results.map((item) =>
getEmojiSequenceString(item.sequence, {
separator: ' ',
case: 'upper',
format: 'utf-32',
add0: true,
})
)
).toEqual([
// emoji without variation in test file
'1F601',
'1F635 200D 1F4AB',
// emojis without variations in test file, but variations in source
'1F60D',
// emoji that has variation in test file
'263A FE0F',
// keycap
'0030 FE0F 20E3',
'0034 FE0F 20E3',
// complex emoji, exists in file
'1F9D1 1F3FE 200D 2764 FE0F 200D 1F9D1 1F3FB',
// simple emoji, not in test file
'25F0 FE0F',
// fake keycap, not in test file
'2345 FE0F 20E3 200D 1235 FE0F',
'1F9D7 FE0F 1F3FE 200D 2640 FE0F',
'1F9D7 FE0F 1F3FF 200D 2642 FE0F',
]);
});
});

View File

@ -96,25 +96,7 @@ describe('Testing unicode test data', () => {
return;
}
// One emoji without variation
expect(
prepareEmojiForIconsList(
{
'2615': 'hot-beverage',
},
data
)
).toEqual({
icons: [
{
icon: 'hot-beverage',
sequence: '2615',
},
],
regex: '\\u2615',
});
// One emoji with variation
// One emoji
expect(
prepareEmojiForIconsList(
{
@ -169,7 +151,7 @@ describe('Testing unicode test data', () => {
sequence: '270b-1f3ff',
},
],
regex: '\\u270B(?:\\uD83C[\\uDFFB-\\uDFFF])?',
regex: '\\u270B(?:\\uD83C[\\uDFFB-\\uDFFF]|\\uFE0F?)',
});
// Multiple emojis, all without variations
@ -202,7 +184,7 @@ describe('Testing unicode test data', () => {
sequence: '1f1e6-1f1ea',
},
],
regex: '\\uD83C\\uDDE6\\uD83C[\\uDDE8-\\uDDEA]|\\u2615',
regex: '\\uD83C\\uDDE6\\uD83C[\\uDDE8-\\uDDEA]|\\u2615\\uFE0F?',
});
});
});

View File

@ -389,7 +389,7 @@ describe('Finding emojis in text', () => {
});
// Get all icons
const iconsList = getQualifiedEmojiVariations(sequences, testData);
const iconsList = getQualifiedEmojiVariations(sequences);
// Get regex
const regexValue = createOptimisedRegexForEmojiSequences(