2
0
mirror of https://github.com/iconify/iconify.git synced 2024-12-12 13:47:49 +00:00

feat: function to parse emoji test file

This commit is contained in:
Vjacheslav Trushkin 2022-12-03 20:14:15 +02:00
parent e186953d54
commit da4ddc5438
3 changed files with 91 additions and 74 deletions

View File

@ -122,6 +122,11 @@
"import": "./lib/emoji/format.mjs", "import": "./lib/emoji/format.mjs",
"types": "./lib/emoji/format.d.ts" "types": "./lib/emoji/format.d.ts"
}, },
"./lib/emoji/parse-test": {
"require": "./lib/emoji/parse-test.cjs",
"import": "./lib/emoji/parse-test.mjs",
"types": "./lib/emoji/parse-test.d.ts"
},
"./lib/icon-set/convert-info": { "./lib/icon-set/convert-info": {
"require": "./lib/icon-set/convert-info.cjs", "require": "./lib/icon-set/convert-info.cjs",
"import": "./lib/icon-set/convert-info.mjs", "import": "./lib/icon-set/convert-info.mjs",

View File

@ -0,0 +1,61 @@
import { getEmojiSequenceFromString } from './cleanup';
// Emoji types
type EmojiType =
| 'component'
| 'fully-qualified'
| 'minimally-qualified'
| 'unqualified';
const componentType: EmojiType = 'component';
// Allowed types, in order of conversion
const allowedTypes: Set<EmojiType> = new Set([
componentType,
'fully-qualified',
'minimally-qualified',
'unqualified',
]);
/**
* Get all emoji sequences from test file
*
* Returns dash-separated hexadecimal codes
*/
export function parseEmojiTestFile(data: string): number[][] {
const emojis: Set<string> = new Set();
// Parse all lines
data.split('\n').forEach((line) => {
line = line.trim();
const parts = line.split('#');
if (parts.length < 2) {
return;
}
// Get code and type from first chunk
const firstChunk = (parts.shift() as string).trim();
if (!firstChunk) {
// Empty first chunk: a comment
return;
}
const firstChunkParts = firstChunk.split(';');
if (firstChunkParts.length !== 2) {
return;
}
const text = firstChunkParts[0].trim();
const code = text.toLowerCase().replace(/\s+/g, '-');
if (!code || !code.match(/^[a-f0-9]+[a-f0-9-]*[a-f0-9]+$/)) {
return;
}
const type = firstChunkParts[1].trim() as EmojiType;
if (!allowedTypes.has(type)) {
throw new Error(`Bad emoji type: ${type}`);
}
// Add code
emojis.add(code);
});
// Return all emojis as sequences
return Array.from(emojis).map(getEmojiSequenceFromString);
}

View File

@ -5,23 +5,9 @@ import {
startUTF32Pair1, startUTF32Pair1,
startUTF32Pair2, startUTF32Pair2,
endUTF32Pair, endUTF32Pair,
minUTF32,
} from '../lib/emoji/data'; } from '../lib/emoji/data';
import { parseEmojiTestFile } from '../lib/emoji/parse-test';
// Emoji types
type EmojiType =
| 'component'
| 'fully-qualified'
| 'minimally-qualified'
| 'unqualified';
const componentType: EmojiType = 'component';
// Allowed types, in order of conversion
const allowedTypes: Set<EmojiType> = new Set([
componentType,
'fully-qualified',
'minimally-qualified',
'unqualified',
]);
describe('Testing emoji code points', () => { describe('Testing emoji code points', () => {
it('Checking available ranges', async () => { it('Checking available ranges', async () => {
@ -58,49 +44,15 @@ describe('Testing emoji code points', () => {
} }
// Get all emojis // Get all emojis
const utf16: Set<string> = new Set(); const utf16: Set<number> = new Set();
const utf32: Set<string> = new Set(); const utf32: Set<number> = new Set();
data.split('\n').forEach((line) => {
line = line.trim();
const parts = line.split('#');
if (parts.length < 2) {
return;
}
// Get code and type from first chunk parseEmojiTestFile(data).forEach((sequence) => {
const firstChunk = (parts.shift() as string).trim(); sequence.forEach((code) => {
if (!firstChunk) { if (code < minUTF32) {
// Empty first chunk: a comment utf16.add(code);
return; } else {
} utf32.add(code);
const firstChunkParts = firstChunk.split(';');
if (firstChunkParts.length !== 2) {
return;
}
const text = firstChunkParts[0].trim();
const code = text.toLowerCase().replace(/\s+/g, '-');
if (!code) {
return;
}
const type = firstChunkParts[1].trim() as EmojiType;
if (!allowedTypes.has(type)) {
throw new Error(`Bad emoji type: ${type}`);
}
// Add code
code.split('-').forEach((chunk) => {
switch (chunk.length) {
case 2:
case 4:
utf16.add(chunk);
break;
case 5:
utf32.add(chunk);
break;
default:
throw new Error(`Bad emoji code: ${text}`);
} }
}); });
}); });
@ -129,10 +81,9 @@ describe('Testing emoji code points', () => {
// ... for UTF-16 code points // ... for UTF-16 code points
let utf16Range: Range | undefined; let utf16Range: Range | undefined;
utf16.forEach((str) => { utf16.forEach((code) => {
const code = getEmojiCodePoint(str);
if (code > startUTF32Pair1 && code < endUTF32Pair) { if (code > startUTF32Pair1 && code < endUTF32Pair) {
throw new Error(`UTF16 in UTF32 range: ${str}`); throw new Error(`UTF16 in UTF32 range: ${code}`);
} }
utf16Range = add(code, utf16Range); utf16Range = add(code, utf16Range);
}); });
@ -140,27 +91,18 @@ describe('Testing emoji code points', () => {
// ... for UTF-32 code points // ... for UTF-32 code points
let utf32FirstRange: Range | undefined; let utf32FirstRange: Range | undefined;
let utf32SecondRange: Range | undefined; let utf32SecondRange: Range | undefined;
utf32.forEach((str) => { utf32.forEach((code) => {
const pair = splitUTF32Number(getEmojiCodePoint(str)); const pair = splitUTF32Number(code);
if (pair) { if (pair) {
utf32FirstRange = add(pair[0], utf32FirstRange); utf32FirstRange = add(pair[0], utf32FirstRange);
utf32SecondRange = add(pair[1], utf32SecondRange); utf32SecondRange = add(pair[1], utf32SecondRange);
} else { } else {
throw new Error(`Unexpected item in UTF32 set: ${str}`); throw new Error(`Unexpected item in UTF32 set: ${code}`);
} }
}); });
// Check UTF-32 emoji ranges
expect(utf32FirstRange).toBeDefined();
expect(utf32FirstRange!.min).toBeGreaterThanOrEqual(startUTF32Pair1);
expect(utf32FirstRange!.max).toBeLessThan(startUTF32Pair2);
expect(utf32SecondRange).toBeDefined();
expect(utf32SecondRange!.min).toBeGreaterThanOrEqual(startUTF32Pair2);
expect(utf32SecondRange!.max).toBeLessThan(endUTF32Pair);
// Dump ranges
/* /*
// Dump ranges
function dump(item: Range | undefined): string { function dump(item: Range | undefined): string {
if (!item) { if (!item) {
return 'undefined'; return 'undefined';
@ -172,5 +114,14 @@ describe('Testing emoji code points', () => {
console.log('UTF16:', dump(utf16Range)); console.log('UTF16:', dump(utf16Range));
console.log('UTF32:', dump(utf32FirstRange), dump(utf32SecondRange)); console.log('UTF32:', dump(utf32FirstRange), dump(utf32SecondRange));
*/ */
// Check UTF-32 emoji ranges
expect(utf32FirstRange).toBeDefined();
expect(utf32FirstRange!.min).toBeGreaterThanOrEqual(startUTF32Pair1);
expect(utf32FirstRange!.max).toBeLessThan(startUTF32Pair2);
expect(utf32SecondRange).toBeDefined();
expect(utf32SecondRange!.min).toBeGreaterThanOrEqual(startUTF32Pair2);
expect(utf32SecondRange!.max).toBeLessThan(endUTF32Pair);
}); });
}); });