diff --git a/packages/utils/src/emoji/parse-test.ts b/packages/utils/src/emoji/parse-test.ts index b971b13..20fa9c4 100644 --- a/packages/utils/src/emoji/parse-test.ts +++ b/packages/utils/src/emoji/parse-test.ts @@ -1,31 +1,59 @@ import { getEmojiSequenceFromString } from './cleanup'; -import { convertEmojiSequenceToUTF32 } from './convert'; import { getEmojiSequenceString } from './format'; import { getUnqualifiedEmojiSequence } from './variations'; // Emoji types -type EmojiType = +type EmojiStatus = | 'component' | 'fully-qualified' | 'minimally-qualified' | 'unqualified'; -const componentType: EmojiType = 'component'; +const componentStatus: EmojiStatus = 'component'; -// Allowed types, in order of conversion -const allowedTypes: Set = new Set([ - componentType, +// Allowed status values, in order of conversion +const allowedStatus: Set = new Set([ + componentStatus, 'fully-qualified', 'minimally-qualified', 'unqualified', ]); +/** + * Test data item + */ +export interface EmojiTestDataItem { + // Group and subgroup + group: string; + subgroup: string; + + // Code points as string, lower case, dash separated + code: string; + + // Code points as numbers, UTF-32 + sequence: number[]; + + // Emoji string + emoji: string; + + // Status + status: EmojiStatus; + + // Version when emoji was added + version: string; + + // Emoji name + name: string; +} + /** * Get all emoji sequences from test file * * Returns all emojis as UTF-32 sequences */ -export function parseEmojiTestFile(data: string): number[][] { - const emojis: Set = new Set(); +export function parseEmojiTestFile(data: string): EmojiTestDataItem[] { + const results: EmojiTestDataItem[] = []; + let group: string | undefined; + let subgroup: string | undefined; // Parse all lines data.split('\n').forEach((line) => { @@ -37,32 +65,81 @@ export function parseEmojiTestFile(data: string): number[][] { // Get code and type from first chunk const firstChunk = (parts.shift() as string).trim(); + const secondChunk = parts.join('#').trim(); if (!firstChunk) { // Empty first chunk: a comment + const commentParts = secondChunk.split(':'); + if (commentParts.length === 2) { + const key = commentParts[0].trim(); + const value = commentParts[1].trim(); + + switch (key) { + case 'group': + group = value; + subgroup = void 0; + break; + + case 'subgroup': + subgroup = value; + break; + } + } + return; } + + if (!group || !subgroup) { + // Cannot parse emojis until group and subgroup are set + return; + } + + // Possible emoji line const firstChunkParts = firstChunk.split(';'); if (firstChunkParts.length !== 2) { return; } - const text = firstChunkParts[0].trim(); - const code = text.toLowerCase().replace(/\s+/g, '-'); + + const code = firstChunkParts[0] + .trim() + .replace(/\s+/g, '-') + .toLowerCase(); if (!code || !code.match(/^[a-f0-9]+[a-f0-9-]*[a-f0-9]+$/)) { return; } - const type = firstChunkParts[1].trim() as EmojiType; - if (!allowedTypes.has(type)) { - throw new Error(`Bad emoji type: ${type}`); + + const status = firstChunkParts[1].trim() as EmojiStatus; + if (!allowedStatus.has(status)) { + throw new Error(`Bad emoji type: ${status}`); } - // Add code - emojis.add(code); + // Parse second chunk + const secondChunkParts = secondChunk.split(/\s+/); + if (secondChunkParts.length < 3) { + throw new Error(`Bad emoji comment for: ${code}`); + } + + // Comment stuff + const emoji = secondChunkParts.shift() as string; + const version = secondChunkParts.shift() as string; + if (version.slice(0, 1) !== 'E') { + throw new Error(`Bad unicode version "${version}" for: ${code}`); + } + const name = secondChunkParts.join(' '); + + // Add item + results.push({ + group, + subgroup, + code, + sequence: getEmojiSequenceFromString(code), + emoji, + status, + version, + name, + }); }); - // Return all emojis as sequences, converted to UTF-32 - return Array.from(emojis).map((item) => - convertEmojiSequenceToUTF32(getEmojiSequenceFromString(item)) - ); + return results; } /** diff --git a/packages/utils/tests/emoji-optional-variations-test.ts b/packages/utils/tests/emoji-optional-variations-test.ts index 85a871a..b8b5686 100644 --- a/packages/utils/tests/emoji-optional-variations-test.ts +++ b/packages/utils/tests/emoji-optional-variations-test.ts @@ -89,18 +89,17 @@ describe('Optional variations of emoji sequences', () => { return; } const testData = parseEmojiTestFile(data); + const testDataSequences = testData.map((item) => item.sequence); // Make sure testData contains both fully-qualified and unqualified emojis - const testDataStrings = new Set( - testData.map((sequence) => getEmojiSequenceString(sequence)) - ); + const testDataStrings = new Set(testData.map((item) => item.code)); expect(testDataStrings.has('1f600')).toBe(true); expect(testDataStrings.has('263a')).toBe(true); expect(testDataStrings.has('263a-fe0f')).toBe(true); // Test getQualifiedEmojiSequencesMap const unqualifiedTest = getQualifiedEmojiSequencesMap( - testData, + testDataSequences, getEmojiSequenceString ); expect(unqualifiedTest['1f600']).toBe('1f600'); @@ -125,7 +124,10 @@ describe('Optional variations of emoji sequences', () => { // fake keycap, not in test file '2345 20E3 200D 1235', ].map(getEmojiSequenceFromString); - const results = addOptionalEmojiVariations(sequences, testData); + const results = addOptionalEmojiVariations( + sequences, + testDataSequences + ); expect( results.map((sequence) => getEmojiSequenceString(sequence, { diff --git a/packages/utils/tests/emoji-regex-find-test.ts b/packages/utils/tests/emoji-regex-find-test.ts index fb0a78b..3abc3e7 100644 --- a/packages/utils/tests/emoji-regex-find-test.ts +++ b/packages/utils/tests/emoji-regex-find-test.ts @@ -68,37 +68,47 @@ describe('Finding emojis in text', () => { expect(sortedMatches).toEqual([ // Same order as in content { - match: emoji1, - sequence: [0x1f600], - keyword: '1f600', + match: { + match: emoji1, + sequence: [0x1f600], + keyword: '1f600', + }, prev: text1, next: text2, }, { - match: emoji2, - sequence: [0x1f603], - keyword: '1f603', + match: { + match: emoji2, + sequence: [0x1f603], + keyword: '1f603', + }, prev: text2, next: text3, }, { - match: emoji3, - sequence: [0x1f600], - keyword: '1f600', + match: { + match: emoji3, + sequence: [0x1f600], + keyword: '1f600', + }, prev: text3, next: text4, }, { - match: emoji4, - sequence: [0x263a], - keyword: '263a', + match: { + match: emoji4, + sequence: [0x263a], + keyword: '263a', + }, prev: text4, next: text5, }, { - match: emoji5, - sequence: [0x263a], - keyword: '263a', + match: { + match: emoji5, + sequence: [0x263a], + keyword: '263a', + }, prev: text5, next: text6, }, diff --git a/packages/utils/tests/validate-emoji-unicode-test.ts b/packages/utils/tests/validate-emoji-unicode-test.ts index a5bff24..7c5e370 100644 --- a/packages/utils/tests/validate-emoji-unicode-test.ts +++ b/packages/utils/tests/validate-emoji-unicode-test.ts @@ -1,6 +1,6 @@ /* eslint-disable @typescript-eslint/no-non-null-assertion */ import { readFile, writeFile, unlink } from 'node:fs/promises'; -import { getEmojiCodePoint, splitUTF32Number } from '../lib/emoji/convert'; +import { splitUTF32Number } from '../lib/emoji/convert'; import { startUTF32Pair1, startUTF32Pair2, @@ -47,8 +47,8 @@ describe('Testing emoji code points', () => { const utf16: Set = new Set(); const utf32: Set = new Set(); - parseEmojiTestFile(data).forEach((sequence) => { - sequence.forEach((code) => { + parseEmojiTestFile(data).forEach((item) => { + item.sequence.forEach((code) => { if (code < minUTF32) { utf16.add(code); } else {