feat: function to parse emoji test file

2024-12-12 13:47:49 +00:00 · 2022-12-03 20:14:15 +02:00 · 2022-12-03 20:14:15 +02:00 · da4ddc5438
commit da4ddc5438
parent e186953d54
3 changed files with 91 additions and 74 deletions
--- a/packages/utils/package.json
+++ b/packages/utils/package.json
@ -122,6 +122,11 @@
 			"import": "./lib/emoji/format.mjs",
 			"types": "./lib/emoji/format.d.ts"
 		},
 		"./lib/emoji/parse-test": {
 			"require": "./lib/emoji/parse-test.cjs",
 			"import": "./lib/emoji/parse-test.mjs",
 			"types": "./lib/emoji/parse-test.d.ts"
 		},
 		"./lib/icon-set/convert-info": {
 			"require": "./lib/icon-set/convert-info.cjs",
 			"import": "./lib/icon-set/convert-info.mjs",
--- a/packages/utils/src/emoji/parse-test.ts
+++ b/packages/utils/src/emoji/parse-test.ts
@ -0,0 +1,61 @@
 import { getEmojiSequenceFromString } from './cleanup';
 // Emoji types
 type EmojiType =
 	| 'component'
 	| 'fully-qualified'
 	| 'minimally-qualified'
 	| 'unqualified';
 const componentType: EmojiType = 'component';
 // Allowed types, in order of conversion
 const allowedTypes: Set<EmojiType> = new Set([
 	componentType,
 	'fully-qualified',
 	'minimally-qualified',
 	'unqualified',
 ]);
 /**
 * Get all emoji sequences from test file
 *
 * Returns dash-separated hexadecimal codes
 */
 export function parseEmojiTestFile(data: string): number[][] {
 	const emojis: Set<string> = new Set();
 	// Parse all lines
 	data.split('\n').forEach((line) => {
 		line = line.trim();
 		const parts = line.split('#');
 		if (parts.length < 2) {
 			return;
 		}
 		// Get code and type from first chunk
 		const firstChunk = (parts.shift() as string).trim();
 		if (!firstChunk) {
 			// Empty first chunk: a comment
 			return;
 		}
 		const firstChunkParts = firstChunk.split(';');
 		if (firstChunkParts.length !== 2) {
 			return;
 		}
 		const text = firstChunkParts[0].trim();
 		const code = text.toLowerCase().replace(/\s+/g, '-');
 		if (!code || !code.match(/^[a-f0-9]+[a-f0-9-]*[a-f0-9]+$/)) {
 			return;
 		}
 		const type = firstChunkParts[1].trim() as EmojiType;
 		if (!allowedTypes.has(type)) {
 			throw new Error(`Bad emoji type: ${type}`);
 		}
 		// Add code
 		emojis.add(code);
 	});
 	// Return all emojis as sequences
 	return Array.from(emojis).map(getEmojiSequenceFromString);
 }
--- a/packages/utils/tests/validate-emoji-unicode-test.ts
+++ b/packages/utils/tests/validate-emoji-unicode-test.ts
@ -5,23 +5,9 @@ import {
 	startUTF32Pair1,
 	startUTF32Pair2,
 	endUTF32Pair,
 	minUTF32,
 } from '../lib/emoji/data';
-
+import { parseEmojiTestFile } from '../lib/emoji/parse-test';
 // Emoji types
 type EmojiType =
 	| 'component'
 	| 'fully-qualified'
 	| 'minimally-qualified'
 	| 'unqualified';
 const componentType: EmojiType = 'component';
 // Allowed types, in order of conversion
 const allowedTypes: Set<EmojiType> = new Set([
 	componentType,
 	'fully-qualified',
 	'minimally-qualified',
 	'unqualified',
 ]);
 describe('Testing emoji code points', () => {
 	it('Checking available ranges', async () => {
@ -58,49 +44,15 @@ describe('Testing emoji code points', () => {
 		}
 		// Get all emojis
-		const utf16: Set<string> = new Set();
+		const utf16: Set<number> = new Set();
-		const utf32: Set<string> = new Set();
+		const utf32: Set<number> = new Set();
 		data.split('\n').forEach((line) => {
 			line = line.trim();
 			const parts = line.split('#');
 			if (parts.length < 2) {
 				return;
 			}
-			// Get code and type from first chunk
+		parseEmojiTestFile(data).forEach((sequence) => {
-			const firstChunk = (parts.shift() as string).trim();
+			sequence.forEach((code) => {
-			if (!firstChunk) {
+				if (code < minUTF32) {
-				// Empty first chunk: a comment
+					utf16.add(code);
-				return;
+				} else {
-			}
+					utf32.add(code);
 			const firstChunkParts = firstChunk.split(';');
 			if (firstChunkParts.length !== 2) {
 				return;
 			}
 			const text = firstChunkParts[0].trim();
 			const code = text.toLowerCase().replace(/\s+/g, '-');
 			if (!code) {
 				return;
 			}
 			const type = firstChunkParts[1].trim() as EmojiType;
 			if (!allowedTypes.has(type)) {
 				throw new Error(`Bad emoji type: ${type}`);
 			}
 			// Add code
 			code.split('-').forEach((chunk) => {
 				switch (chunk.length) {
 					case 2:
 					case 4:
 						utf16.add(chunk);
 						break;
 					case 5:
 						utf32.add(chunk);
 						break;
 					default:
 						throw new Error(`Bad emoji code: ${text}`);
 				}
 			});
 		});
@ -129,10 +81,9 @@ describe('Testing emoji code points', () => {
 		// ... for UTF-16 code points
 		let utf16Range: Range | undefined;
-		utf16.forEach((str) => {
+		utf16.forEach((code) => {
 			const code = getEmojiCodePoint(str);
 			if (code > startUTF32Pair1 && code < endUTF32Pair) {
-				throw new Error(`UTF16 in UTF32 range: ${str}`);
+				throw new Error(`UTF16 in UTF32 range: ${code}`);
 			}
 			utf16Range = add(code, utf16Range);
 		});
@ -140,27 +91,18 @@ describe('Testing emoji code points', () => {
 		// ... for UTF-32 code points
 		let utf32FirstRange: Range | undefined;
 		let utf32SecondRange: Range | undefined;
-		utf32.forEach((str) => {
+		utf32.forEach((code) => {
-			const pair = splitUTF32Number(getEmojiCodePoint(str));
+			const pair = splitUTF32Number(code);
 			if (pair) {
 				utf32FirstRange = add(pair[0], utf32FirstRange);
 				utf32SecondRange = add(pair[1], utf32SecondRange);
 			} else {
-				throw new Error(`Unexpected item in UTF32 set: ${str}`);
+				throw new Error(`Unexpected item in UTF32 set: ${code}`);
 			}
 		});
 		// Check UTF-32 emoji ranges
 		expect(utf32FirstRange).toBeDefined();
 		expect(utf32FirstRange!.min).toBeGreaterThanOrEqual(startUTF32Pair1);
 		expect(utf32FirstRange!.max).toBeLessThan(startUTF32Pair2);
 		expect(utf32SecondRange).toBeDefined();
 		expect(utf32SecondRange!.min).toBeGreaterThanOrEqual(startUTF32Pair2);
 		expect(utf32SecondRange!.max).toBeLessThan(endUTF32Pair);
 		// Dump ranges
 		/*
 		// Dump ranges
 		function dump(item: Range | undefined): string {
 			if (!item) {
 				return 'undefined';
@ -172,5 +114,14 @@ describe('Testing emoji code points', () => {
 		console.log('UTF16:', dump(utf16Range));
 		console.log('UTF32:', dump(utf32FirstRange), dump(utf32SecondRange));
 		*/
 		// Check UTF-32 emoji ranges
 		expect(utf32FirstRange).toBeDefined();
 		expect(utf32FirstRange!.min).toBeGreaterThanOrEqual(startUTF32Pair1);
 		expect(utf32FirstRange!.max).toBeLessThan(startUTF32Pair2);
 		expect(utf32SecondRange).toBeDefined();
 		expect(utf32SecondRange!.min).toBeGreaterThanOrEqual(startUTF32Pair2);
 		expect(utf32SecondRange!.max).toBeLessThan(endUTF32Pair);
 	});
 });