From b6effea97491b71b82dbdb9fab4776585135b856 Mon Sep 17 00:00:00 2001 From: Vjacheslav Trushkin Date: Sun, 1 Jan 2023 20:59:37 +0200 Subject: [PATCH] fix(utils): do not use test data to get full emoji sequence --- packages/utils/package.json | 2 +- packages/utils/src/emoji/parse.ts | 2 +- packages/utils/src/emoji/regex/create.ts | 9 +- packages/utils/src/emoji/test/components.ts | 2 +- packages/utils/src/emoji/test/variations.ts | 44 ++++--- .../tests/emoji-optional-variations-test.ts | 115 +----------------- packages/utils/tests/emoji-parse-test.ts | 24 +--- packages/utils/tests/emoji-regex-find-test.ts | 2 +- 8 files changed, 32 insertions(+), 168 deletions(-) diff --git a/packages/utils/package.json b/packages/utils/package.json index f879edc..f346f5a 100644 --- a/packages/utils/package.json +++ b/packages/utils/package.json @@ -3,7 +3,7 @@ "type": "module", "description": "Common functions for working with Iconify icon sets used by various packages.", "author": "Vjacheslav Trushkin", - "version": "2.0.7", + "version": "2.0.8", "license": "MIT", "bugs": "https://github.com/iconify/iconify/issues", "homepage": "https://iconify.design/", diff --git a/packages/utils/src/emoji/parse.ts b/packages/utils/src/emoji/parse.ts index 3ff6660..b7d5947 100644 --- a/packages/utils/src/emoji/parse.ts +++ b/packages/utils/src/emoji/parse.ts @@ -61,7 +61,7 @@ export function prepareEmojiForIconsList( } // Get fully-qualified versions of emojis - iconsList = getQualifiedEmojiVariations(iconsList, testData); + iconsList = getQualifiedEmojiVariations(iconsList); // Find and add missing emojis if test data is available if (testData) { diff --git a/packages/utils/src/emoji/regex/create.ts b/packages/utils/src/emoji/regex/create.ts index 11a872a..a7bd2e7 100644 --- a/packages/utils/src/emoji/regex/create.ts +++ b/packages/utils/src/emoji/regex/create.ts @@ -1,6 +1,5 @@ import { getSequenceFromEmojiStringOrKeyword } from '../cleanup'; import { convertEmojiSequenceToUTF32 } from '../convert'; -import type { EmojiTestData } from '../test/parse'; import { getQualifiedEmojiVariations } from '../test/variations'; import { createEmojisTree, parseEmojiTree } from './tree'; @@ -37,10 +36,7 @@ export function createOptimisedRegexForEmojiSequences( * * All examples above refer to the same emoji and will generate the same regex result */ -export function createOptimisedRegex( - emojis: (string | number[])[], - testData?: EmojiTestData -): string { +export function createOptimisedRegex(emojis: (string | number[])[]): string { // Convert to numbers let sequences = emojis.map((item) => typeof item === 'string' @@ -55,8 +51,7 @@ export function createOptimisedRegex( return { sequence, }; - }), - testData + }) ).map((item) => item.sequence); // Parse diff --git a/packages/utils/src/emoji/test/components.ts b/packages/utils/src/emoji/test/components.ts index 9234af6..9f9169e 100644 --- a/packages/utils/src/emoji/test/components.ts +++ b/packages/utils/src/emoji/test/components.ts @@ -34,7 +34,7 @@ export function mapEmojiTestDataComponents( for (const key in emojiComponents) { const type = key as EmojiComponentType; const range = emojiComponents[type]; - for (let number = range[0]; number <= range[1]; number++) { + for (let number = range[0]; number < range[1]; number++) { const keyword = getEmojiSequenceKeyword([number]); const item = testSequences[keyword]; if (!item) { diff --git a/packages/utils/src/emoji/test/variations.ts b/packages/utils/src/emoji/test/variations.ts index 27c7c85..4a9e471 100644 --- a/packages/utils/src/emoji/test/variations.ts +++ b/packages/utils/src/emoji/test/variations.ts @@ -4,9 +4,8 @@ import { splitEmojiSequences, } from '../cleanup'; import { convertEmojiSequenceToUTF32 } from '../convert'; -import { keycapEmoji, vs16Emoji } from '../data'; +import { emojiComponents, keycapEmoji, vs16Emoji } from '../data'; import { getEmojiSequenceKeyword } from '../format'; -import type { EmojiTestData } from './parse'; /** * Get qualified sequence, adding optional `FE0F` wherever it might exist @@ -24,9 +23,21 @@ export function guessQualifiedEmojiSequence(sequence: number[]): number[] { return part; } - // Check for keycap - if (part.length === 2 && part[1] === keycapEmoji) { - return [part[0], vs16Emoji, part[1]]; + // Check for keycap and components + if (part.length === 2) { + const lastNum = part[1]; + if (lastNum === keycapEmoji) { + // emoji + keycap + return [part[0], vs16Emoji, lastNum]; + } + for (const key in emojiComponents) { + const range = + emojiComponents[key as keyof typeof emojiComponents]; + if (lastNum >= range[0] && lastNum < range[1]) { + // emoji + component + return [part[0], vs16Emoji, lastNum]; + } + } } // Add `FE0F` to 1 character emojis @@ -50,33 +61,21 @@ interface BaseSequenceItem { * Get qualified variations for emojis * * Also converts list to UTF-32 as needed and removes duplicate items - * - * `testData`, returned by parseEmojiTestFile() is used to check which emojis have `FE0F` variations. - * If missing or emoji is missing in test data, `FE0F` is added to every single code emoji. - * It can also be an array of sequences. */ - export function getQualifiedEmojiVariation( - item: T, - testData?: EmojiTestData + item: T ): T { // Convert to UTF-32, get unqualified sequence const unqualifiedSequence = getUnqualifiedEmojiSequence( convertEmojiSequenceToUTF32(item.sequence) ); - // Check test data. Key is unqualified sequence - const key = getEmojiSequenceKeyword(unqualifiedSequence); - const testDataItem = testData?.[key]; - const result: T = { ...item, - sequence: testDataItem - ? testDataItem.sequence - : guessQualifiedEmojiSequence(unqualifiedSequence), + sequence: guessQualifiedEmojiSequence(unqualifiedSequence), }; if (result.sequenceKey) { - result.sequenceKey = key; + result.sequenceKey = getEmojiSequenceKeyword(unqualifiedSequence); } return result; } @@ -85,14 +84,13 @@ export function getQualifiedEmojiVariation( * Get qualified emoji variations for set of emojis, ignoring duplicate entries */ export function getQualifiedEmojiVariations( - items: T[], - testData?: EmojiTestData + items: T[] ): T[] { // Parse all sequences const results = Object.create(null) as Record; for (let i = 0; i < items.length; i++) { - const result = getQualifiedEmojiVariation(items[i], testData); + const result = getQualifiedEmojiVariation(items[i]); const key = getEmojiSequenceKeyword( getUnqualifiedEmojiSequence(result.sequence) ); diff --git a/packages/utils/tests/emoji-optional-variations-test.ts b/packages/utils/tests/emoji-optional-variations-test.ts index d513dfc..030d2a9 100644 --- a/packages/utils/tests/emoji-optional-variations-test.ts +++ b/packages/utils/tests/emoji-optional-variations-test.ts @@ -1,45 +1,8 @@ -import { readFile, writeFile, unlink } from 'node:fs/promises'; -import { emojiVersion } from '../lib/emoji/data'; import { getEmojiSequenceFromString } from '../lib/emoji/cleanup'; import { getEmojiSequenceString } from '../lib/emoji/format'; -import { parseEmojiTestFile } from '../lib/emoji/test/parse'; import { getQualifiedEmojiVariations } from '../lib/emoji/test/variations'; describe('Qualified variations of emoji sequences', () => { - async function fetchEmojiTestData(): Promise { - // Fetch emojis, cache it - const source = `tests/fixtures/download-emoji-${emojiVersion}.txt`; - - let data: string | undefined; - try { - data = await readFile(source, 'utf8'); - } catch { - // - } - - if (!data) { - data = ( - await fetch( - `https://unicode.org/Public/emoji/${emojiVersion}/emoji-test.txt` - ) - ) - .text() - .toString(); - await writeFile(source, data, 'utf8'); - } - - // Test content, unlink cache on failure - if (data.indexOf(`# Version: ${emojiVersion}`) === -1) { - try { - await unlink(source); - } catch { - // - } - return; - } - return data; - } - it('Variations without test data', () => { const sequences = [ // simple emoji, twice to check duplicates @@ -90,82 +53,8 @@ describe('Qualified variations of emoji sequences', () => { '1F1E6 1F1F8', '1F3F4 E0067 E0062 E0065 E006E E0067 E007F', // mix of simple and complex, with and without variation - '1F9D7 1F3FE 200D 2640 FE0F', - '1F9D7 1F3FF 200D 2642 FE0F', - ]); - }); - - it('Variations with test data', async () => { - // Fetch emojis, cache it - const data = await fetchEmojiTestData(); - if (!data) { - console.warn('Test skipped: test data is not available'); - return; - } - const testData = parseEmojiTestFile(data); - - // Make sure testData keys contain only unqualified emojis - const testDataStrings = new Set(Object.keys(testData)); - expect(testDataStrings.has('1f600')).toBe(true); - expect(testDataStrings.has('263a')).toBe(true); - expect(testDataStrings.has('263a-fe0f')).toBe(false); - - // Make sure values contain qualified emojis - expect(testData['1f600'].sequence).toEqual([0x1f600]); - expect(testData['263a'].sequence).toEqual([0x263a, 0xfe0f]); - - // Sequences to test - const sequences = [ - // emoji without variation in test file - '1F601', - '1F635 200D 1F4AB', - // emojis without variations in test file, but variations in source - '1F60D FE0F', - // emoji that has variation in test file - '263A', - // keycap - '0030 20E3', - '0034 FE0F 20E3', - // complex emoji, exists in file - '1F9D1 1F3FE 200D 2764 200D 1F9D1 1F3FB', - // simple emoji, not in test file - '25F0', - // fake keycap, not in test file - '2345 20E3 200D 1235', - ].map((source) => { - const sequence = getEmojiSequenceFromString(source); - return { - source, - sequence, - }; - }); - const results = getQualifiedEmojiVariations(sequences, testData); - expect( - results.map((item) => - getEmojiSequenceString(item.sequence, { - separator: ' ', - case: 'upper', - format: 'utf-32', - add0: true, - }) - ) - ).toEqual([ - // emoji without variation in test file - '1F601', - '1F635 200D 1F4AB', - // emojis without variations in test file, but variations in source - '1F60D', - // emoji that has variation in test file - '263A FE0F', - // keycap - '0030 FE0F 20E3', - '0034 FE0F 20E3', - // complex emoji, exists in file - '1F9D1 1F3FE 200D 2764 FE0F 200D 1F9D1 1F3FB', - // simple emoji, not in test file - '25F0 FE0F', - // fake keycap, not in test file - '2345 FE0F 20E3 200D 1235 FE0F', + '1F9D7 FE0F 1F3FE 200D 2640 FE0F', + '1F9D7 FE0F 1F3FF 200D 2642 FE0F', ]); }); }); diff --git a/packages/utils/tests/emoji-parse-test.ts b/packages/utils/tests/emoji-parse-test.ts index 5719c42..ef6112e 100644 --- a/packages/utils/tests/emoji-parse-test.ts +++ b/packages/utils/tests/emoji-parse-test.ts @@ -96,25 +96,7 @@ describe('Testing unicode test data', () => { return; } - // One emoji without variation - expect( - prepareEmojiForIconsList( - { - '2615': 'hot-beverage', - }, - data - ) - ).toEqual({ - icons: [ - { - icon: 'hot-beverage', - sequence: '2615', - }, - ], - regex: '\\u2615', - }); - - // One emoji with variation + // One emoji expect( prepareEmojiForIconsList( { @@ -169,7 +151,7 @@ describe('Testing unicode test data', () => { sequence: '270b-1f3ff', }, ], - regex: '\\u270B(?:\\uD83C[\\uDFFB-\\uDFFF])?', + regex: '\\u270B(?:\\uD83C[\\uDFFB-\\uDFFF]|\\uFE0F?)', }); // Multiple emojis, all without variations @@ -202,7 +184,7 @@ describe('Testing unicode test data', () => { sequence: '1f1e6-1f1ea', }, ], - regex: '\\uD83C\\uDDE6\\uD83C[\\uDDE8-\\uDDEA]|\\u2615', + regex: '\\uD83C\\uDDE6\\uD83C[\\uDDE8-\\uDDEA]|\\u2615\\uFE0F?', }); }); }); diff --git a/packages/utils/tests/emoji-regex-find-test.ts b/packages/utils/tests/emoji-regex-find-test.ts index 314ef98..5bccb10 100644 --- a/packages/utils/tests/emoji-regex-find-test.ts +++ b/packages/utils/tests/emoji-regex-find-test.ts @@ -389,7 +389,7 @@ describe('Finding emojis in text', () => { }); // Get all icons - const iconsList = getQualifiedEmojiVariations(sequences, testData); + const iconsList = getQualifiedEmojiVariations(sequences); // Get regex const regexValue = createOptimisedRegexForEmojiSequences(