From 37e382d989600d1da8e609922b0962364fae8dc0 Mon Sep 17 00:00:00 2001 From: Vjacheslav Trushkin Date: Fri, 16 Dec 2022 10:48:17 +0200 Subject: [PATCH] feat(utils): function to find missing emojis, fixes for emoji functions --- packages/utils/package.json | 7 +- packages/utils/src/emoji/data.ts | 4 +- packages/utils/src/emoji/regex/create.ts | 4 +- packages/utils/src/emoji/test/copy.ts | 275 ++++++++++++++++++ packages/utils/src/emoji/test/name.ts | 85 ++++-- packages/utils/src/emoji/test/variations.ts | 10 +- packages/utils/src/index.ts | 3 +- .../tests/emoji-optional-variations-test.ts | 6 +- packages/utils/tests/emoji-testdata-test.ts | 104 ++++++- 9 files changed, 455 insertions(+), 43 deletions(-) create mode 100644 packages/utils/src/emoji/test/copy.ts diff --git a/packages/utils/package.json b/packages/utils/package.json index ecac880..104bd48 100644 --- a/packages/utils/package.json +++ b/packages/utils/package.json @@ -3,7 +3,7 @@ "type": "module", "description": "Common functions for working with Iconify icon sets used by various packages.", "author": "Vjacheslav Trushkin", - "version": "2.0.3", + "version": "2.0.4", "license": "MIT", "bugs": "https://github.com/iconify/iconify/issues", "homepage": "https://iconify.design/", @@ -142,6 +142,11 @@ "import": "./lib/emoji/test/components.mjs", "types": "./lib/emoji/test/components.d.ts" }, + "./lib/emoji/test/copy": { + "require": "./lib/emoji/test/copy.cjs", + "import": "./lib/emoji/test/copy.mjs", + "types": "./lib/emoji/test/copy.d.ts" + }, "./lib/emoji/test/name": { "require": "./lib/emoji/test/name.cjs", "import": "./lib/emoji/test/name.mjs", diff --git a/packages/utils/src/emoji/data.ts b/packages/utils/src/emoji/data.ts index d055764..1f517a3 100644 --- a/packages/utils/src/emoji/data.ts +++ b/packages/utils/src/emoji/data.ts @@ -19,10 +19,10 @@ export const keycapEmoji = 0x20e3; export type EmojiComponentType = 'skin-tone' | 'hair-style'; type Range = [number, number]; export const emojiComponents: Record = { - // Skin tones - 'skin-tone': [0x1f3fb, 0x1f400], // Hair styles 'hair-style': [0x1f9b0, 0x1f9b4], + // Skin tones + 'skin-tone': [0x1f3fb, 0x1f400], }; /** diff --git a/packages/utils/src/emoji/regex/create.ts b/packages/utils/src/emoji/regex/create.ts index a2c5646..26bbed9 100644 --- a/packages/utils/src/emoji/regex/create.ts +++ b/packages/utils/src/emoji/regex/create.ts @@ -1,6 +1,6 @@ import { getEmojiSequenceFromString } from '../cleanup'; import { convertEmojiSequenceToUTF32 } from '../convert'; -import { addQualifiedEmojiVariations } from '../test/variations'; +import { getQualifiedEmojiVariations } from '../test/variations'; import { createEmojisTree, parseEmojiTree } from './tree'; /** @@ -46,7 +46,7 @@ export function createOptimisedRegex( ); // Add variations - sequences = addQualifiedEmojiVariations(sequences, testData); + sequences = getQualifiedEmojiVariations(sequences, testData); // Parse return createOptimisedRegexForEmojiSequences(sequences); diff --git a/packages/utils/src/emoji/test/copy.ts b/packages/utils/src/emoji/test/copy.ts new file mode 100644 index 0000000..f07c71a --- /dev/null +++ b/packages/utils/src/emoji/test/copy.ts @@ -0,0 +1,275 @@ +import { getUnqualifiedEmojiSequence } from '../cleanup'; +import { emojiComponents, EmojiComponentType } from '../data'; +import { getEmojiSequenceString } from '../format'; +import { mapEmojiTestDataComponents } from './components'; +import { EmojiComponentsMapItem, getEmojiComponentsMap } from './name'; +import { EmojiTestDataItem, mapEmojiTestDataBySequence } from './parse'; + +type SequenceType = 'qualified' | 'unqualified'; +interface SequenceData { + type: SequenceType; + sequence: number[]; + key: string; +} +type Sequences = Record; + +type ComponentsIteration = Required>; + +/** + * Get components iteration + */ +function addToComponentsIteration( + components: ComponentsIteration, + attr: EmojiComponentType, + value: number +): ComponentsIteration { + const result: ComponentsIteration = { + 'hair-style': components['hair-style'].slice(0), + 'skin-tone': components['skin-tone'].slice(0), + }; + result[attr].push(value); + return result; +} + +/** + * Replace components with number in sequence + */ +function addComponentsToSequence( + sequence: (EmojiComponentType | number)[], + components: ComponentsIteration +): number[] { + const indexes: Required> = { + 'hair-style': 0, + 'skin-tone': 0, + }; + return sequence.map((value) => { + if (typeof value === 'number') { + return value; + } + const index = indexes[value]++; + return components[value][index]; + }); +} + +/** + * Get sequence variations + */ +function getSequence(sequence: number[]): Sequences { + const qualified: SequenceData = { + type: 'qualified', + sequence, + key: getEmojiSequenceString(sequence), + }; + + const unqualifiedSequence = getUnqualifiedEmojiSequence(sequence); + const unqualified: SequenceData = + unqualifiedSequence.length === sequence.length + ? { + ...qualified, + type: 'unqualified', + } + : { + type: 'unqualified', + sequence: unqualifiedSequence, + key: getEmojiSequenceString(unqualifiedSequence), + }; + + return { + qualified, + unqualified, + }; +} + +/** + * Item to copy + */ +interface EmojiSequenceToCopy { + // Source: sequence and name + source: number[]; + sourceName: string; + + // Target: sequence and name + target: number[]; + targetName: string; +} + +/** + * Get sequences + * + * Returns map, where key is item to add, value is source + */ +export function getEmojisSequencesToCopy( + sequences: number[][], + testData: EmojiTestDataItem[] +): EmojiSequenceToCopy[] { + const results: EmojiSequenceToCopy[] = []; + + // Prepare stuff + const componentsMap = mapEmojiTestDataComponents( + mapEmojiTestDataBySequence(testData, getEmojiSequenceString), + getEmojiSequenceString + ); + const componentsMapItems = getEmojiComponentsMap(testData, componentsMap); + + // Get all existing emojis + const existingItems = Object.create(null) as Record; + const copiedItems = Object.create(null) as Record; + sequences.forEach((sequence) => { + existingItems[getEmojiSequenceString(sequence)] = sequence; + }); + + // Check if item exists + const itemExists = (sequence: Sequences): SequenceType | undefined => { + return existingItems[sequence.qualified.key] + ? 'qualified' + : existingItems[sequence.unqualified.key] + ? 'unqualified' + : void 0; + }; + const itemWasCopied = (sequence: Sequences): SequenceType | undefined => { + return copiedItems[sequence.qualified.key] + ? 'qualified' + : copiedItems[sequence.unqualified.key] + ? 'unqualified' + : void 0; + }; + + // Copy item + const addToCopy = ( + source: SequenceData, + sourceName: string, + target: SequenceData, + targetName: string + ) => { + copiedItems[target.key] = target.sequence; + results.push({ + source: source.sequence, + sourceName, + target: target.sequence, + targetName, + }); + }; + + // Get name + const getName = ( + item: EmojiComponentsMapItem, + components: ComponentsIteration + ) => { + let name = item.name; + for (const key in emojiComponents) { + const type = key as EmojiComponentType; + for (let i = 0; i < components[type].length; i++) { + const num = components[type][i]; + const text = componentsMap.names.get(num) as string; + name = name.replace(`{${type}-${i}}`, text); + } + } + return name; + }; + + // Check item and its children + const checkItem = ( + parentItem: EmojiComponentsMapItem, + parentSequence: SequenceData, + parentComponents: ComponentsIteration, + onlyIfExists = true + ) => { + const children = parentItem.children; + if (!children) { + return; + } + for (const key in emojiComponents) { + const type = key as EmojiComponentType; + if (children[type]) { + // Check emojis + const childItem = children[type]; + const range = emojiComponents[type]; + + // Add each item in range + for (let num = range[0]; num < range[1]; num++) { + const components = addToComponentsIteration( + parentComponents, + type, + num + ); + const sequence = addComponentsToSequence( + childItem.sequence, + components + ); + const sequences = getSequence(sequence); + + // Check if already exists + const existingSequence = itemExists(sequences); + if (existingSequence) { + // Already exists + checkItem( + childItem, + sequences[existingSequence], + components, + onlyIfExists + ); + continue; + } + + // Check if was copied + let copiedSequence = itemWasCopied(sequences); + if (copiedSequence && onlyIfExists) { + // Cannot parse nested items yet + continue; + } + + // Copy + if (!copiedSequence) { + // Copy sequence + copiedSequence = parentSequence.type; + addToCopy( + parentSequence, + getName(parentItem, parentComponents), + sequences[copiedSequence], + getName(childItem, components) + ); + } + + // Check child items + checkItem( + childItem, + sequences[copiedSequence], + components, + onlyIfExists + ); + } + } + } + }; + + // Check all items + componentsMapItems.forEach((mainItem) => { + const sequence = getSequence(mainItem.sequence as number[]); + const type = itemExists(sequence); + if (!type) { + // Base emoji is missing: nothing to do + return; + } + + checkItem( + mainItem, + sequence[type], + { + 'hair-style': [], + 'skin-tone': [], + }, + true + ); + checkItem( + mainItem, + sequence[type], + { + 'hair-style': [], + 'skin-tone': [], + }, + false + ); + }); + + return results; +} diff --git a/packages/utils/src/emoji/test/name.ts b/packages/utils/src/emoji/test/name.ts index da22dcd..23ab52a 100644 --- a/packages/utils/src/emoji/test/name.ts +++ b/packages/utils/src/emoji/test/name.ts @@ -113,11 +113,24 @@ function mergeComponentTypes(value: EmojiComponentType[]) { return '[' + value.join(',') + ']'; } +type ComponentsCount = Required>; + +function mergeComponentsCount(value: ComponentsCount) { + const keys: EmojiComponentType[] = []; + for (const key in emojiComponents) { + const type = key as EmojiComponentType; + for (let i = 0; i < value[type]; i++) { + keys.push(type); + } + } + return keys.length ? mergeComponentTypes(keys) : ''; +} + /** * Map item */ type EmojiComponentsMapItemSequence = (EmojiComponentType | number)[]; -interface EmojiComponentsMapItem { +export interface EmojiComponentsMapItem { // Name, with `{skin-tone-1}` (type + index) placeholders name: string; @@ -135,17 +148,16 @@ interface EmojiComponentsMapItem { * Only sequences with components are returned */ export function getEmojiComponentsMap( - testData: EmojiTestDataItem[] + testData: EmojiTestDataItem[], + componentsMap?: EmojiTestDataComponentsMap ): EmojiComponentsMapItem[] { // Prepare stuff - const mappedTestData = mapEmojiTestDataBySequence( - testData, - getEmojiSequenceString - ); - const components = mapEmojiTestDataComponents( - mappedTestData, - getEmojiSequenceString - ); + const components = + componentsMap || + mapEmojiTestDataComponents( + mapEmojiTestDataBySequence(testData, getEmojiSequenceString), + getEmojiSequenceString + ); // Function to clean sequence const cleanSequence = (sequence: number[]): string => { @@ -160,7 +172,7 @@ export function getEmojiComponentsMap( interface SplitListItem { item: EmojiTestDataItem; split: SplitEmojiName; - components: EmojiComponentType[]; + components: ComponentsCount; } type SplitList = Record; const splitData = Object.create(null) as Record; @@ -179,16 +191,18 @@ export function getEmojiComponentsMap( // Create unique key based on component types let sequenceKey = defaultSplitDataKey; - const itemComponents: EmojiComponentType[] = []; + const itemComponents: ComponentsCount = { + 'hair-style': 0, + 'skin-tone': 0, + }; if (split.components) { split.variations?.forEach((item) => { if (typeof item !== 'string') { - itemComponents.push(item.type); + itemComponents[item.type]++; } }); - if (itemComponents.length) { - sequenceKey = mergeComponentTypes(itemComponents); - } + sequenceKey = + mergeComponentsCount(itemComponents) || defaultSplitDataKey; } // Get item if already exists @@ -228,11 +242,9 @@ export function getEmojiComponentsMap( // Function to get item const getItem = ( - components: EmojiComponentType[] + components: ComponentsCount ): EmojiComponentsMapItem | undefined => { - const key = components.length - ? mergeComponentTypes(components) - : defaultSplitDataKey; + const key = mergeComponentsCount(components) || defaultSplitDataKey; const item = items[key]; if (!item) { return; @@ -253,15 +265,19 @@ export function getEmojiComponentsMap( }); // Get name - let counter = 0; + const counter: ComponentsCount = { + 'hair-style': 0, + 'skin-tone': 0, + }; const nameVariations = variations?.map((chunk) => { if (typeof chunk === 'string') { return chunk; } - if (components[counter] !== chunk.type) { + const count = counter[chunk.type]++; + if (components[chunk.type] < count) { throw new Error('Bad variations order'); } - return `{${chunk.type}-${counter++}}`; + return `{${chunk.type}-${count}}`; }); const name = split.base + @@ -277,16 +293,21 @@ export function getEmojiComponentsMap( const checkChildren = ( parent: EmojiComponentsMapItem, - components: EmojiComponentType[] + components: ComponentsCount ): boolean => { // Attempt to add each type let found = false; for (const key in emojiComponents) { const type = key as EmojiComponentType; - const childComponents = components.concat([type]); + + // Find child item + const childComponents = { + ...components, + }; + childComponents[type]++; + const childItem = getItem(childComponents); // Get sequence for child item - const childItem = getItem(childComponents); if (childItem) { found = true; @@ -305,9 +326,17 @@ export function getEmojiComponentsMap( }; // Get main item - const mainItem = getItem([]); + const mainItem = getItem({ + 'hair-style': 0, + 'skin-tone': 0, + }); if (mainItem) { - if (checkChildren(mainItem, [])) { + if ( + checkChildren(mainItem, { + 'hair-style': 0, + 'skin-tone': 0, + }) + ) { // Found item with children results.push(mainItem); } diff --git a/packages/utils/src/emoji/test/variations.ts b/packages/utils/src/emoji/test/variations.ts index b4fb2d5..1f0f1fe 100644 --- a/packages/utils/src/emoji/test/variations.ts +++ b/packages/utils/src/emoji/test/variations.ts @@ -37,24 +37,24 @@ export function guessQualifiedEmojiSequence(sequence: number[]): number[] { } /** - * Add qualified variations to emojis + * Get qualified variations for emojis * - * Also converts list to UTF-32 as needed + * Also converts list to UTF-32 as needed and removes duplicate items * * `testData`, returned by parseEmojiTestFile() is used to check which emojis have `FE0F` variations. * If missing or emoji is missing in test data, `FE0F` is added to every single code emoji. * It can also be an array of sequences. */ -export function addQualifiedEmojiVariations( +export function getQualifiedEmojiVariations( sequences: number[][], testData?: (number[] | EmojiTestDataItem)[] ): number[][]; -export function addQualifiedEmojiVariations( +export function getQualifiedEmojiVariations( sequences: number[][], testData: (number[] | EmojiTestDataItem)[], toString: (value: number[]) => string ): string[]; -export function addQualifiedEmojiVariations( +export function getQualifiedEmojiVariations( sequences: number[][], testData: (number[] | EmojiTestDataItem)[] = [], toString?: (value: number[]) => string diff --git a/packages/utils/src/index.ts b/packages/utils/src/index.ts index 437bdaf..6232a4b 100644 --- a/packages/utils/src/index.ts +++ b/packages/utils/src/index.ts @@ -105,7 +105,8 @@ export { parseEmojiTestFile, getQualifiedEmojiSequencesMap, } from './emoji/test/parse'; -export { addQualifiedEmojiVariations as addOptionalEmojiVariations } from './emoji/test/variations'; +export { getQualifiedEmojiVariations } from './emoji/test/variations'; +export { getEmojisSequencesToCopy } from './emoji/test/copy'; export { createOptimisedRegex, createOptimisedRegexForEmojiSequences, diff --git a/packages/utils/tests/emoji-optional-variations-test.ts b/packages/utils/tests/emoji-optional-variations-test.ts index 29a48e2..326e419 100644 --- a/packages/utils/tests/emoji-optional-variations-test.ts +++ b/packages/utils/tests/emoji-optional-variations-test.ts @@ -6,7 +6,7 @@ import { getQualifiedEmojiSequencesMap, parseEmojiTestFile, } from '../lib/emoji/test/parse'; -import { addQualifiedEmojiVariations } from '../lib/emoji/test/variations'; +import { getQualifiedEmojiVariations } from '../lib/emoji/test/variations'; describe('Qualified variations of emoji sequences', () => { async function fetchEmojiTestData(): Promise { @@ -62,7 +62,7 @@ describe('Qualified variations of emoji sequences', () => { '1F9D7 1F3FE 200D 2640 FE0F', '1F9D7 1F3FF 200D 2642 ', ].map(getEmojiSequenceFromString); - const results = addQualifiedEmojiVariations(sequences); + const results = getQualifiedEmojiVariations(sequences); expect( results.map((sequence) => getEmojiSequenceString(sequence, { @@ -135,7 +135,7 @@ describe('Qualified variations of emoji sequences', () => { // fake keycap, not in test file '2345 20E3 200D 1235', ].map(getEmojiSequenceFromString); - const results = addQualifiedEmojiVariations( + const results = getQualifiedEmojiVariations( sequences, testDataSequences ); diff --git a/packages/utils/tests/emoji-testdata-test.ts b/packages/utils/tests/emoji-testdata-test.ts index 886c227..5ca160e 100644 --- a/packages/utils/tests/emoji-testdata-test.ts +++ b/packages/utils/tests/emoji-testdata-test.ts @@ -22,6 +22,8 @@ import { SplitEmojiName, getEmojiComponentsMap, } from '../lib/emoji/test/name'; +import { getEmojisSequencesToCopy } from '../lib/emoji/test/copy'; +import { getQualifiedEmojiVariations } from '../lib/emoji/test/variations'; describe('Testing unicode test data', () => { async function fetchEmojiTestData(): Promise { @@ -547,7 +549,7 @@ describe('Testing unicode test data', () => { sequence: [0x1f469, 'skin-tone'], children: { 'hair-style': { - name: 'woman: {skin-tone-0}, {hair-style-1}', + name: 'woman: {skin-tone-0}, {hair-style-0}', sequence: [ 0x1f469, 'skin-tone', @@ -560,8 +562,108 @@ describe('Testing unicode test data', () => { 'hair-style': { name: 'woman: {hair-style-0}', sequence: [0x1f469, 0x200d, 'hair-style'], + children: { + 'skin-tone': { + name: 'woman: {skin-tone-0}, {hair-style-0}', + sequence: [ + 0x1f469, + 'skin-tone', + 0x200d, + 'hair-style', + ], + }, + }, + }, + }, + }); + + // Item with multiple skin tones + const item4 = map.find( + (item) => sequenceToString(item.sequence) === '1f46b' + ); + expect(item4).toEqual({ + name: 'woman and man holding hands', + sequence: [0x1f46b], + children: { + 'skin-tone': { + name: 'woman and man holding hands: {skin-tone-0}', + sequence: [0x1f46b, 'skin-tone'], + children: { + 'skin-tone': { + name: 'woman and man holding hands: {skin-tone-0}, {skin-tone-1}', + sequence: [ + 0x1f469, + 'skin-tone', + 0x200d, + 0x1f91d, + 0x200d, + 0x1f468, + 'skin-tone', + ], + }, + }, }, }, }); }); + + it('Checking for missing sequences', () => { + if (!data) { + console.warn('Test skipped: test data is not available'); + return; + } + + const testData = parseEmojiTestFile(data); + const sequences = getQualifiedEmojiVariations( + testData.map((item) => item.sequence), + testData + ); + + const missing = getEmojisSequencesToCopy(sequences, testData); + + // Should be 30 entries for 15.0 + // TODO: update for newer versions + expect(missing.length).toBe(30); + + // Two identical tones. Not a valid emoji, but optimises regex + expect( + missing.find( + (item) => item.sourceName === 'handshake: light skin tone' + ) + ).toEqual({ + source: [0x1f91d, 0x1f3fb], + sourceName: 'handshake: light skin tone', + target: [0x1faf1, 0x1f3fb, 0x200d, 0x1faf2, 0x1f3fb], + targetName: 'handshake: light skin tone, light skin tone', + }); + + // Check with custom data: only base icon + const missing2 = getEmojisSequencesToCopy([[0x1f91d]], testData); + + // Missing icons: [skin-tone], [skin-tone, skin-tone] + expect(missing2.length).toBe(5 + 5 * 5); + expect( + missing2.find( + (item) => item.targetName === 'handshake: light skin tone' + ) + ).toEqual({ + source: [0x1f91d], + sourceName: 'handshake', + target: [0x1f91d, 0x1f3fb], + targetName: 'handshake: light skin tone', + }); + expect( + missing2.find( + (item) => + item.targetName === + 'handshake: medium-light skin tone, light skin tone' + ) + ).toEqual({ + // Should be copied from first component match + source: [0x1f91d, 0x1f3fc], + sourceName: 'handshake: medium-light skin tone', + target: [0x1faf1, 0x1f3fc, 0x200d, 0x1faf2, 0x1f3fb], + targetName: 'handshake: medium-light skin tone, light skin tone', + }); + }); });