2
0
mirror of https://github.com/iconify/iconify.git synced 2024-11-09 23:00:56 +00:00

fix(utils): get full data from test file, fix unit test for searching emojis

This commit is contained in:
Vjacheslav Trushkin 2022-12-14 23:37:22 +02:00
parent 9f5be0f10d
commit 70758ebfa6
4 changed files with 131 additions and 42 deletions

View File

@ -1,31 +1,59 @@
import { getEmojiSequenceFromString } from './cleanup';
import { convertEmojiSequenceToUTF32 } from './convert';
import { getEmojiSequenceString } from './format';
import { getUnqualifiedEmojiSequence } from './variations';
// Emoji types
type EmojiType =
type EmojiStatus =
| 'component'
| 'fully-qualified'
| 'minimally-qualified'
| 'unqualified';
const componentType: EmojiType = 'component';
const componentStatus: EmojiStatus = 'component';
// Allowed types, in order of conversion
const allowedTypes: Set<EmojiType> = new Set([
componentType,
// Allowed status values, in order of conversion
const allowedStatus: Set<EmojiStatus> = new Set([
componentStatus,
'fully-qualified',
'minimally-qualified',
'unqualified',
]);
/**
* Test data item
*/
export interface EmojiTestDataItem {
// Group and subgroup
group: string;
subgroup: string;
// Code points as string, lower case, dash separated
code: string;
// Code points as numbers, UTF-32
sequence: number[];
// Emoji string
emoji: string;
// Status
status: EmojiStatus;
// Version when emoji was added
version: string;
// Emoji name
name: string;
}
/**
* Get all emoji sequences from test file
*
* Returns all emojis as UTF-32 sequences
*/
export function parseEmojiTestFile(data: string): number[][] {
const emojis: Set<string> = new Set();
export function parseEmojiTestFile(data: string): EmojiTestDataItem[] {
const results: EmojiTestDataItem[] = [];
let group: string | undefined;
let subgroup: string | undefined;
// Parse all lines
data.split('\n').forEach((line) => {
@ -37,32 +65,81 @@ export function parseEmojiTestFile(data: string): number[][] {
// Get code and type from first chunk
const firstChunk = (parts.shift() as string).trim();
const secondChunk = parts.join('#').trim();
if (!firstChunk) {
// Empty first chunk: a comment
const commentParts = secondChunk.split(':');
if (commentParts.length === 2) {
const key = commentParts[0].trim();
const value = commentParts[1].trim();
switch (key) {
case 'group':
group = value;
subgroup = void 0;
break;
case 'subgroup':
subgroup = value;
break;
}
}
return;
}
if (!group || !subgroup) {
// Cannot parse emojis until group and subgroup are set
return;
}
// Possible emoji line
const firstChunkParts = firstChunk.split(';');
if (firstChunkParts.length !== 2) {
return;
}
const text = firstChunkParts[0].trim();
const code = text.toLowerCase().replace(/\s+/g, '-');
const code = firstChunkParts[0]
.trim()
.replace(/\s+/g, '-')
.toLowerCase();
if (!code || !code.match(/^[a-f0-9]+[a-f0-9-]*[a-f0-9]+$/)) {
return;
}
const type = firstChunkParts[1].trim() as EmojiType;
if (!allowedTypes.has(type)) {
throw new Error(`Bad emoji type: ${type}`);
const status = firstChunkParts[1].trim() as EmojiStatus;
if (!allowedStatus.has(status)) {
throw new Error(`Bad emoji type: ${status}`);
}
// Add code
emojis.add(code);
// Parse second chunk
const secondChunkParts = secondChunk.split(/\s+/);
if (secondChunkParts.length < 3) {
throw new Error(`Bad emoji comment for: ${code}`);
}
// Comment stuff
const emoji = secondChunkParts.shift() as string;
const version = secondChunkParts.shift() as string;
if (version.slice(0, 1) !== 'E') {
throw new Error(`Bad unicode version "${version}" for: ${code}`);
}
const name = secondChunkParts.join(' ');
// Add item
results.push({
group,
subgroup,
code,
sequence: getEmojiSequenceFromString(code),
emoji,
status,
version,
name,
});
});
// Return all emojis as sequences, converted to UTF-32
return Array.from(emojis).map((item) =>
convertEmojiSequenceToUTF32(getEmojiSequenceFromString(item))
);
return results;
}
/**

View File

@ -89,18 +89,17 @@ describe('Optional variations of emoji sequences', () => {
return;
}
const testData = parseEmojiTestFile(data);
const testDataSequences = testData.map((item) => item.sequence);
// Make sure testData contains both fully-qualified and unqualified emojis
const testDataStrings = new Set(
testData.map((sequence) => getEmojiSequenceString(sequence))
);
const testDataStrings = new Set(testData.map((item) => item.code));
expect(testDataStrings.has('1f600')).toBe(true);
expect(testDataStrings.has('263a')).toBe(true);
expect(testDataStrings.has('263a-fe0f')).toBe(true);
// Test getQualifiedEmojiSequencesMap
const unqualifiedTest = getQualifiedEmojiSequencesMap(
testData,
testDataSequences,
getEmojiSequenceString
);
expect(unqualifiedTest['1f600']).toBe('1f600');
@ -125,7 +124,10 @@ describe('Optional variations of emoji sequences', () => {
// fake keycap, not in test file
'2345 20E3 200D 1235',
].map(getEmojiSequenceFromString);
const results = addOptionalEmojiVariations(sequences, testData);
const results = addOptionalEmojiVariations(
sequences,
testDataSequences
);
expect(
results.map((sequence) =>
getEmojiSequenceString(sequence, {

View File

@ -68,37 +68,47 @@ describe('Finding emojis in text', () => {
expect(sortedMatches).toEqual([
// Same order as in content
{
match: emoji1,
sequence: [0x1f600],
keyword: '1f600',
match: {
match: emoji1,
sequence: [0x1f600],
keyword: '1f600',
},
prev: text1,
next: text2,
},
{
match: emoji2,
sequence: [0x1f603],
keyword: '1f603',
match: {
match: emoji2,
sequence: [0x1f603],
keyword: '1f603',
},
prev: text2,
next: text3,
},
{
match: emoji3,
sequence: [0x1f600],
keyword: '1f600',
match: {
match: emoji3,
sequence: [0x1f600],
keyword: '1f600',
},
prev: text3,
next: text4,
},
{
match: emoji4,
sequence: [0x263a],
keyword: '263a',
match: {
match: emoji4,
sequence: [0x263a],
keyword: '263a',
},
prev: text4,
next: text5,
},
{
match: emoji5,
sequence: [0x263a],
keyword: '263a',
match: {
match: emoji5,
sequence: [0x263a],
keyword: '263a',
},
prev: text5,
next: text6,
},

View File

@ -1,6 +1,6 @@
/* eslint-disable @typescript-eslint/no-non-null-assertion */
import { readFile, writeFile, unlink } from 'node:fs/promises';
import { getEmojiCodePoint, splitUTF32Number } from '../lib/emoji/convert';
import { splitUTF32Number } from '../lib/emoji/convert';
import {
startUTF32Pair1,
startUTF32Pair2,
@ -47,8 +47,8 @@ describe('Testing emoji code points', () => {
const utf16: Set<number> = new Set();
const utf32: Set<number> = new Set();
parseEmojiTestFile(data).forEach((sequence) => {
sequence.forEach((code) => {
parseEmojiTestFile(data).forEach((item) => {
item.sequence.forEach((code) => {
if (code < minUTF32) {
utf16.add(code);
} else {