From da4ddc54387e31404428af626802cbbcb629364e Mon Sep 17 00:00:00 2001 From: Vjacheslav Trushkin Date: Sat, 3 Dec 2022 20:14:15 +0200 Subject: [PATCH] feat: function to parse emoji test file --- packages/utils/package.json | 5 + packages/utils/src/emoji/parse-test.ts | 61 ++++++++++++ .../tests/validate-emoji-unicode-test.ts | 99 +++++-------------- 3 files changed, 91 insertions(+), 74 deletions(-) create mode 100644 packages/utils/src/emoji/parse-test.ts diff --git a/packages/utils/package.json b/packages/utils/package.json index bf0dca0..191759c 100644 --- a/packages/utils/package.json +++ b/packages/utils/package.json @@ -122,6 +122,11 @@ "import": "./lib/emoji/format.mjs", "types": "./lib/emoji/format.d.ts" }, + "./lib/emoji/parse-test": { + "require": "./lib/emoji/parse-test.cjs", + "import": "./lib/emoji/parse-test.mjs", + "types": "./lib/emoji/parse-test.d.ts" + }, "./lib/icon-set/convert-info": { "require": "./lib/icon-set/convert-info.cjs", "import": "./lib/icon-set/convert-info.mjs", diff --git a/packages/utils/src/emoji/parse-test.ts b/packages/utils/src/emoji/parse-test.ts new file mode 100644 index 0000000..b651ff8 --- /dev/null +++ b/packages/utils/src/emoji/parse-test.ts @@ -0,0 +1,61 @@ +import { getEmojiSequenceFromString } from './cleanup'; + +// Emoji types +type EmojiType = + | 'component' + | 'fully-qualified' + | 'minimally-qualified' + | 'unqualified'; +const componentType: EmojiType = 'component'; + +// Allowed types, in order of conversion +const allowedTypes: Set = new Set([ + componentType, + 'fully-qualified', + 'minimally-qualified', + 'unqualified', +]); + +/** + * Get all emoji sequences from test file + * + * Returns dash-separated hexadecimal codes + */ +export function parseEmojiTestFile(data: string): number[][] { + const emojis: Set = new Set(); + + // Parse all lines + data.split('\n').forEach((line) => { + line = line.trim(); + const parts = line.split('#'); + if (parts.length < 2) { + return; + } + + // Get code and type from first chunk + const firstChunk = (parts.shift() as string).trim(); + if (!firstChunk) { + // Empty first chunk: a comment + return; + } + const firstChunkParts = firstChunk.split(';'); + if (firstChunkParts.length !== 2) { + return; + } + const text = firstChunkParts[0].trim(); + const code = text.toLowerCase().replace(/\s+/g, '-'); + if (!code || !code.match(/^[a-f0-9]+[a-f0-9-]*[a-f0-9]+$/)) { + return; + } + const type = firstChunkParts[1].trim() as EmojiType; + if (!allowedTypes.has(type)) { + throw new Error(`Bad emoji type: ${type}`); + } + + // Add code + emojis.add(code); + }); + + // Return all emojis as sequences + return Array.from(emojis).map(getEmojiSequenceFromString); +} diff --git a/packages/utils/tests/validate-emoji-unicode-test.ts b/packages/utils/tests/validate-emoji-unicode-test.ts index 9b052a1..a5bff24 100644 --- a/packages/utils/tests/validate-emoji-unicode-test.ts +++ b/packages/utils/tests/validate-emoji-unicode-test.ts @@ -5,23 +5,9 @@ import { startUTF32Pair1, startUTF32Pair2, endUTF32Pair, + minUTF32, } from '../lib/emoji/data'; - -// Emoji types -type EmojiType = - | 'component' - | 'fully-qualified' - | 'minimally-qualified' - | 'unqualified'; -const componentType: EmojiType = 'component'; - -// Allowed types, in order of conversion -const allowedTypes: Set = new Set([ - componentType, - 'fully-qualified', - 'minimally-qualified', - 'unqualified', -]); +import { parseEmojiTestFile } from '../lib/emoji/parse-test'; describe('Testing emoji code points', () => { it('Checking available ranges', async () => { @@ -58,49 +44,15 @@ describe('Testing emoji code points', () => { } // Get all emojis - const utf16: Set = new Set(); - const utf32: Set = new Set(); - data.split('\n').forEach((line) => { - line = line.trim(); - const parts = line.split('#'); - if (parts.length < 2) { - return; - } + const utf16: Set = new Set(); + const utf32: Set = new Set(); - // Get code and type from first chunk - const firstChunk = (parts.shift() as string).trim(); - if (!firstChunk) { - // Empty first chunk: a comment - return; - } - const firstChunkParts = firstChunk.split(';'); - if (firstChunkParts.length !== 2) { - return; - } - const text = firstChunkParts[0].trim(); - const code = text.toLowerCase().replace(/\s+/g, '-'); - if (!code) { - return; - } - const type = firstChunkParts[1].trim() as EmojiType; - if (!allowedTypes.has(type)) { - throw new Error(`Bad emoji type: ${type}`); - } - - // Add code - code.split('-').forEach((chunk) => { - switch (chunk.length) { - case 2: - case 4: - utf16.add(chunk); - break; - - case 5: - utf32.add(chunk); - break; - - default: - throw new Error(`Bad emoji code: ${text}`); + parseEmojiTestFile(data).forEach((sequence) => { + sequence.forEach((code) => { + if (code < minUTF32) { + utf16.add(code); + } else { + utf32.add(code); } }); }); @@ -129,10 +81,9 @@ describe('Testing emoji code points', () => { // ... for UTF-16 code points let utf16Range: Range | undefined; - utf16.forEach((str) => { - const code = getEmojiCodePoint(str); + utf16.forEach((code) => { if (code > startUTF32Pair1 && code < endUTF32Pair) { - throw new Error(`UTF16 in UTF32 range: ${str}`); + throw new Error(`UTF16 in UTF32 range: ${code}`); } utf16Range = add(code, utf16Range); }); @@ -140,27 +91,18 @@ describe('Testing emoji code points', () => { // ... for UTF-32 code points let utf32FirstRange: Range | undefined; let utf32SecondRange: Range | undefined; - utf32.forEach((str) => { - const pair = splitUTF32Number(getEmojiCodePoint(str)); + utf32.forEach((code) => { + const pair = splitUTF32Number(code); if (pair) { utf32FirstRange = add(pair[0], utf32FirstRange); utf32SecondRange = add(pair[1], utf32SecondRange); } else { - throw new Error(`Unexpected item in UTF32 set: ${str}`); + throw new Error(`Unexpected item in UTF32 set: ${code}`); } }); - // Check UTF-32 emoji ranges - expect(utf32FirstRange).toBeDefined(); - expect(utf32FirstRange!.min).toBeGreaterThanOrEqual(startUTF32Pair1); - expect(utf32FirstRange!.max).toBeLessThan(startUTF32Pair2); - - expect(utf32SecondRange).toBeDefined(); - expect(utf32SecondRange!.min).toBeGreaterThanOrEqual(startUTF32Pair2); - expect(utf32SecondRange!.max).toBeLessThan(endUTF32Pair); - - // Dump ranges /* + // Dump ranges function dump(item: Range | undefined): string { if (!item) { return 'undefined'; @@ -172,5 +114,14 @@ describe('Testing emoji code points', () => { console.log('UTF16:', dump(utf16Range)); console.log('UTF32:', dump(utf32FirstRange), dump(utf32SecondRange)); */ + + // Check UTF-32 emoji ranges + expect(utf32FirstRange).toBeDefined(); + expect(utf32FirstRange!.min).toBeGreaterThanOrEqual(startUTF32Pair1); + expect(utf32FirstRange!.max).toBeLessThan(startUTF32Pair2); + + expect(utf32SecondRange).toBeDefined(); + expect(utf32SecondRange!.min).toBeGreaterThanOrEqual(startUTF32Pair2); + expect(utf32SecondRange!.max).toBeLessThan(endUTF32Pair); }); });