2
0
mirror of https://github.com/iconify/iconify.git synced 2025-01-06 07:20:40 +00:00

chore(utils): restructure emoji code to handle sequences with custom properties, fix errors

This commit is contained in:
Vjacheslav Trushkin 2022-12-24 20:44:29 +02:00
parent 85c12955c0
commit e5dbd00cba
15 changed files with 1336 additions and 1208 deletions

View File

@ -142,10 +142,10 @@
"import": "./lib/emoji/test/components.mjs",
"types": "./lib/emoji/test/components.d.ts"
},
"./lib/emoji/test/copy": {
"require": "./lib/emoji/test/copy.cjs",
"import": "./lib/emoji/test/copy.mjs",
"types": "./lib/emoji/test/copy.d.ts"
"./lib/emoji/test/missing": {
"require": "./lib/emoji/test/missing.cjs",
"import": "./lib/emoji/test/missing.mjs",
"types": "./lib/emoji/test/missing.d.ts"
},
"./lib/emoji/test/name": {
"require": "./lib/emoji/test/name.cjs",
@ -157,6 +157,16 @@
"import": "./lib/emoji/test/parse.mjs",
"types": "./lib/emoji/test/parse.d.ts"
},
"./lib/emoji/test/tree": {
"require": "./lib/emoji/test/tree.cjs",
"import": "./lib/emoji/test/tree.mjs",
"types": "./lib/emoji/test/tree.d.ts"
},
"./lib/emoji/test/similar": {
"require": "./lib/emoji/test/similar.cjs",
"import": "./lib/emoji/test/similar.mjs",
"types": "./lib/emoji/test/similar.d.ts"
},
"./lib/emoji/test/variations": {
"require": "./lib/emoji/test/variations.cjs",
"import": "./lib/emoji/test/variations.mjs",

View File

@ -92,13 +92,6 @@ export function joinEmojiSequences(
return results;
}
/**
* Remove variations
*/
export function removeEmojiVariations(sequence: number[]): number[] {
return sequence.filter((code) => code !== vs16Emoji);
}
/**
* Get unqualified sequence
*/

View File

@ -1,5 +1,6 @@
import { getSequenceFromEmojiStringOrKeyword } from '../cleanup';
import { convertEmojiSequenceToUTF32 } from '../convert';
import type { EmojiTestData } from '../test/parse';
import { getQualifiedEmojiVariations } from '../test/variations';
import { createEmojisTree, parseEmojiTree } from './tree';
@ -38,7 +39,7 @@ export function createOptimisedRegexForEmojiSequences(
*/
export function createOptimisedRegex(
emojis: (string | number[])[],
testData?: number[][]
testData?: EmojiTestData
): string {
// Convert to numbers
let sequences = emojis.map((item) =>
@ -48,7 +49,15 @@ export function createOptimisedRegex(
);
// Add variations
sequences = getQualifiedEmojiVariations(sequences, testData);
// Temporary convert to object with 'sequence' property
sequences = getQualifiedEmojiVariations(
sequences.map((sequence) => {
return {
sequence,
};
}),
testData
).map((item) => item.sequence);
// Parse
return createOptimisedRegexForEmojiSequences(sequences);

View File

@ -1,5 +1,6 @@
import { emojiComponents, EmojiComponentType } from '../data';
import type { EmojiSequenceToStringCallback, EmojiTestDataItem } from './parse';
import { getEmojiSequenceKeyword } from '../format';
import type { EmojiTestData, EmojiTestDataItem } from './parse';
export interface EmojiTestDataComponentsMap {
// Keywords
@ -20,8 +21,7 @@ export interface EmojiTestDataComponentsMap {
* Map components from test data
*/
export function mapEmojiTestDataComponents(
testSequences: Record<string, EmojiTestDataItem>,
convert: EmojiSequenceToStringCallback
testSequences: EmojiTestData
): EmojiTestDataComponentsMap {
const results: EmojiTestDataComponentsMap = {
converted: new Map(),
@ -35,7 +35,7 @@ export function mapEmojiTestDataComponents(
const type = key as EmojiComponentType;
const range = emojiComponents[type];
for (let number = range[0]; number <= range[1]; number++) {
const keyword = convert([number]);
const keyword = getEmojiSequenceKeyword([number]);
const item = testSequences[keyword];
if (!item) {
throw new Error(
@ -57,3 +57,87 @@ export function mapEmojiTestDataComponents(
return results;
}
/**
* Sequence with components
*/
export type EmojiSequenceWithComponents = (EmojiComponentType | number)[];
/**
* Convert to string
*/
export function emojiSequenceWithComponentsToString(
sequence: EmojiSequenceWithComponents
): string {
return sequence
.map((item) => (typeof item === 'number' ? item.toString(16) : item))
.join('-');
}
/**
* Entry in sequence
*/
export interface EmojiSequenceComponentEntry {
// Index in sequence
index: number;
// Component type
type: EmojiComponentType;
}
/**
* Find variations in sequence
*/
export function findEmojiComponentsInSequence(
sequence: number[]
): EmojiSequenceComponentEntry[] {
const components: EmojiSequenceComponentEntry[] = [];
for (let index = 0; index < sequence.length; index++) {
const code = sequence[index];
for (const key in emojiComponents) {
const type = key as EmojiComponentType;
const range = emojiComponents[type];
if (code >= range[0] && code < range[1]) {
components.push({
index,
type,
});
break;
}
}
}
return components;
}
/**
* Component values
*/
export type EmojiSequenceComponentValues = Partial<
Record<EmojiComponentType, number[]>
>;
/**
* Replace components in sequence
*/
export function replaceEmojiComponentsInCombinedSequence(
sequence: EmojiSequenceWithComponents,
values: EmojiSequenceComponentValues
): number[] {
const indexes: Record<EmojiComponentType, number> = {
'hair-style': 0,
'skin-tone': 0,
};
return sequence.map((item) => {
if (typeof item === 'number') {
return item;
}
const index = indexes[item]++;
const list = values[item];
if (!list || !list.length) {
throw new Error(`Cannot replace ${item}: no valid values provided`);
}
return list[index >= list.length ? list.length - 1 : index];
});
}

View File

@ -1,275 +0,0 @@
import { getUnqualifiedEmojiSequence } from '../cleanup';
import { emojiComponents, EmojiComponentType } from '../data';
import { getEmojiSequenceKeyword } from '../format';
import { mapEmojiTestDataComponents } from './components';
import { EmojiComponentsMapItem, getEmojiComponentsMap } from './name';
import { EmojiTestDataItem, mapEmojiTestDataBySequence } from './parse';
type SequenceType = 'qualified' | 'unqualified';
interface SequenceData {
type: SequenceType;
sequence: number[];
key: string;
}
type Sequences = Record<SequenceType, SequenceData>;
type ComponentsIteration = Required<Record<EmojiComponentType, number[]>>;
/**
* Get components iteration
*/
function addToComponentsIteration(
components: ComponentsIteration,
attr: EmojiComponentType,
value: number
): ComponentsIteration {
const result: ComponentsIteration = {
'hair-style': components['hair-style'].slice(0),
'skin-tone': components['skin-tone'].slice(0),
};
result[attr].push(value);
return result;
}
/**
* Replace components with number in sequence
*/
function addComponentsToSequence(
sequence: (EmojiComponentType | number)[],
components: ComponentsIteration
): number[] {
const indexes: Required<Record<EmojiComponentType, number>> = {
'hair-style': 0,
'skin-tone': 0,
};
return sequence.map((value) => {
if (typeof value === 'number') {
return value;
}
const index = indexes[value]++;
return components[value][index];
});
}
/**
* Get sequence variations
*/
function getSequence(sequence: number[]): Sequences {
const qualified: SequenceData = {
type: 'qualified',
sequence,
key: getEmojiSequenceKeyword(sequence),
};
const unqualifiedSequence = getUnqualifiedEmojiSequence(sequence);
const unqualified: SequenceData =
unqualifiedSequence.length === sequence.length
? {
...qualified,
type: 'unqualified',
}
: {
type: 'unqualified',
sequence: unqualifiedSequence,
key: getEmojiSequenceKeyword(unqualifiedSequence),
};
return {
qualified,
unqualified,
};
}
/**
* Item to copy
*/
interface EmojiSequenceToCopy {
// Source: sequence and name
source: number[];
sourceName: string;
// Target: sequence and name
target: number[];
targetName: string;
}
/**
* Get sequences
*
* Returns map, where key is item to add, value is source
*/
export function getEmojisSequencesToCopy(
sequences: number[][],
testData: EmojiTestDataItem[]
): EmojiSequenceToCopy[] {
const results: EmojiSequenceToCopy[] = [];
// Prepare stuff
const componentsMap = mapEmojiTestDataComponents(
mapEmojiTestDataBySequence(testData, getEmojiSequenceKeyword),
getEmojiSequenceKeyword
);
const componentsMapItems = getEmojiComponentsMap(testData, componentsMap);
// Get all existing emojis
const existingItems = Object.create(null) as Record<string, number[]>;
const copiedItems = Object.create(null) as Record<string, number[]>;
sequences.forEach((sequence) => {
existingItems[getEmojiSequenceKeyword(sequence)] = sequence;
});
// Check if item exists
const itemExists = (sequence: Sequences): SequenceType | undefined => {
return existingItems[sequence.qualified.key]
? 'qualified'
: existingItems[sequence.unqualified.key]
? 'unqualified'
: void 0;
};
const itemWasCopied = (sequence: Sequences): SequenceType | undefined => {
return copiedItems[sequence.qualified.key]
? 'qualified'
: copiedItems[sequence.unqualified.key]
? 'unqualified'
: void 0;
};
// Copy item
const addToCopy = (
source: SequenceData,
sourceName: string,
target: SequenceData,
targetName: string
) => {
copiedItems[target.key] = target.sequence;
results.push({
source: source.sequence,
sourceName,
target: target.sequence,
targetName,
});
};
// Get name
const getName = (
item: EmojiComponentsMapItem,
components: ComponentsIteration
) => {
let name = item.name;
for (const key in emojiComponents) {
const type = key as EmojiComponentType;
for (let i = 0; i < components[type].length; i++) {
const num = components[type][i];
const text = componentsMap.names.get(num) as string;
name = name.replace(`{${type}-${i}}`, text);
}
}
return name;
};
// Check item and its children
const checkItem = (
parentItem: EmojiComponentsMapItem,
parentSequence: SequenceData,
parentComponents: ComponentsIteration,
onlyIfExists = true
) => {
const children = parentItem.children;
if (!children) {
return;
}
for (const key in emojiComponents) {
const type = key as EmojiComponentType;
if (children[type]) {
// Check emojis
const childItem = children[type];
const range = emojiComponents[type];
// Add each item in range
for (let num = range[0]; num < range[1]; num++) {
const components = addToComponentsIteration(
parentComponents,
type,
num
);
const sequence = addComponentsToSequence(
childItem.sequence,
components
);
const sequences = getSequence(sequence);
// Check if already exists
const existingSequence = itemExists(sequences);
if (existingSequence) {
// Already exists
checkItem(
childItem,
sequences[existingSequence],
components,
onlyIfExists
);
continue;
}
// Check if was copied
let copiedSequence = itemWasCopied(sequences);
if (copiedSequence && onlyIfExists) {
// Cannot parse nested items yet
continue;
}
// Copy
if (!copiedSequence) {
// Copy sequence
copiedSequence = parentSequence.type;
addToCopy(
parentSequence,
getName(parentItem, parentComponents),
sequences[copiedSequence],
getName(childItem, components)
);
}
// Check child items
checkItem(
childItem,
sequences[copiedSequence],
components,
onlyIfExists
);
}
}
}
};
// Check all items
componentsMapItems.forEach((mainItem) => {
const sequence = getSequence(mainItem.sequence as number[]);
const type = itemExists(sequence);
if (!type) {
// Base emoji is missing: nothing to do
return;
}
checkItem(
mainItem,
sequence[type],
{
'hair-style': [],
'skin-tone': [],
},
true
);
checkItem(
mainItem,
sequence[type],
{
'hair-style': [],
'skin-tone': [],
},
false
);
});
return results;
}

View File

@ -0,0 +1,154 @@
import { getUnqualifiedEmojiSequence } from '../cleanup';
import { emojiComponents, EmojiComponentType } from '../data';
import { getEmojiSequenceKeyword } from '../format';
import {
replaceEmojiComponentsInCombinedSequence,
EmojiSequenceComponentValues,
} from './components';
import type { EmojiComponentsTree, EmojiComponentsTreeItem } from './tree';
/**
* Base type to extend
*/
interface BaseSequenceItem {
sequence: number[];
// If present, will be set in value too
// String version of sequence without variation unicode
sequenceKey?: string;
}
/**
* Find missing emojis
*
* Result includes missing items, which are extended from items that needs to
* be copied. To identify which emojis to copy, source object should include
* something like `iconName` key that points to icon sequence represents.
*/
export function findMissingEmojis<T extends BaseSequenceItem>(
sequences: T[],
testDataTree: EmojiComponentsTree
): T[] {
const results: T[] = [];
const existingItems = Object.create(null) as Record<string, T>;
const copiedItems = Object.create(null) as Record<string, T>;
// Get all existing sequences
sequences.forEach((item) => {
const sequence = getUnqualifiedEmojiSequence(item.sequence);
const key = getEmojiSequenceKeyword(sequence);
if (
!existingItems[key] ||
// If multiple matches for same sequence exist, use longest version
existingItems[key].sequence.length < item.sequence.length
) {
existingItems[key] = item;
}
});
// Function to iterate sequences
const iterate = (
type: EmojiComponentType,
parentTree: EmojiComponentsTreeItem,
parentValues: Required<EmojiSequenceComponentValues>,
parentItem: T,
deep: boolean
) => {
const childTree = parentTree.children?.[type];
if (!childTree) {
return;
}
// Sequence exists
const range = emojiComponents[type];
for (let number = range[0]; number < range[1]; number++) {
// Create new values
const values: Required<EmojiSequenceComponentValues> = {
'hair-style': [...parentValues['hair-style']],
'skin-tone': [...parentValues['skin-tone']],
};
values[type].push(number);
// Generate sequence
const sequence = replaceEmojiComponentsInCombinedSequence(
childTree.item.sequence,
values
);
const key = getEmojiSequenceKeyword(
getUnqualifiedEmojiSequence(sequence)
);
// Get item
const oldItem = existingItems[key];
let item: T;
if (oldItem) {
// Exists
item = oldItem;
} else {
// Check if already created
item = copiedItems[key];
if (!item) {
// Create new item
item = {
...parentItem,
sequence,
};
if (item.sequenceKey) {
item.sequenceKey = key;
}
copiedItems[key] = item;
results.push(item);
}
}
// Check child elements
if (deep || oldItem) {
for (const key in values) {
iterate(
key as EmojiComponentType,
childTree,
values,
item,
deep
);
}
}
}
};
// Function to check tree item
const parse = (key: string, deep: boolean) => {
const treeItem = testDataTree[key];
const sequenceKey = treeItem.item.sequenceKey;
// Check if item actually exists
const rootItem = existingItems[sequenceKey];
if (!rootItem) {
return;
}
// Parse tree
const values: Required<EmojiSequenceComponentValues> = {
'skin-tone': [],
'hair-style': [],
};
for (const key in values) {
iterate(
key as EmojiComponentType,
treeItem,
values,
rootItem,
deep
);
}
};
// Shallow check first, then full check
for (const key in testDataTree) {
parse(key, false);
parse(key, true);
}
return results;
}

View File

@ -1,19 +1,14 @@
import { emojiComponents, EmojiComponentType, vs16Emoji } from '../data';
import { getEmojiSequenceKeyword } from '../format';
import {
import { emojiComponents, EmojiComponentType } from '../data';
import type {
EmojiSequenceComponentEntry,
EmojiTestDataComponentsMap,
mapEmojiTestDataComponents,
} from './components';
import { EmojiTestDataItem, mapEmojiTestDataBySequence } from './parse';
interface EmojiNameVariation {
// Index in sequence
index: number;
// Component type
type: EmojiComponentType;
}
/**
* Split emoji name in base name and variations
*
* Variations are also split in strings and emoji components with indexes pointing to sequence
*/
export interface SplitEmojiName {
// Base name
base: string;
@ -22,7 +17,7 @@ export interface SplitEmojiName {
key: string;
// Variations
variations?: (string | EmojiNameVariation)[];
variations?: (string | EmojiSequenceComponentEntry)[];
// Number of components
components?: number;
@ -55,293 +50,52 @@ export function splitEmojiNameVariations(
}
// Get variations
let startIndex = 0;
let components = 0;
const keyParts: string[] = [];
const variations = parts
const variations: (string | EmojiSequenceComponentEntry)[] = parts
.join(nameSplit)
.split(variationSplit)
.map((text) => {
.filter((text) => {
const type = componentsData.types[text];
if (!type) {
// Not a component
if (!ignoredVariations.has(text)) {
keyParts.push(text);
}
return text;
return !ignoredVariations.has(text);
}
// Component
const range = emojiComponents[type];
while (startIndex < sequence.length) {
const num = sequence[startIndex];
startIndex++;
if (num >= range[0] && num <= range[1]) {
// Got range match
components++;
return {
index: startIndex - 1,
type,
};
}
}
// Ran out of sequence
throw new Error(
`Cannot find variation in sequence for "${name}", [${sequence.join(
' '
)}]`
);
return false;
});
const key =
base +
(keyParts.length ? nameSplit + keyParts.join(variationSplit) : '');
return {
(variations.length ? nameSplit + variations.join(variationSplit) : '');
const result: SplitEmojiName = {
base,
key,
variations,
components,
};
}
/**
* Merge component types
*/
function mergeComponentTypes(value: EmojiComponentType[]) {
return '[' + value.join(',') + ']';
}
type ComponentsCount = Required<Record<EmojiComponentType, number>>;
function mergeComponentsCount(value: ComponentsCount) {
const keys: EmojiComponentType[] = [];
for (const key in emojiComponents) {
const type = key as EmojiComponentType;
for (let i = 0; i < value[type]; i++) {
keys.push(type);
}
}
return keys.length ? mergeComponentTypes(keys) : '';
}
/**
* Map item
*/
type EmojiComponentsMapItemSequence = (EmojiComponentType | number)[];
export interface EmojiComponentsMapItem {
// Name, with `{skin-tone-1}` (type + index) placeholders
name: string;
// Sequence
sequence: EmojiComponentsMapItemSequence;
// Child element(s)
children?: Record<EmojiComponentType, EmojiComponentsMapItem>;
}
/**
* Get map of emoji components
*
* Result includes emoji sequences with largest number of characters (usually fully-qualified)
* Only sequences with components are returned
*/
export function getEmojiComponentsMap(
testData: EmojiTestDataItem[],
componentsMap?: EmojiTestDataComponentsMap
): EmojiComponentsMapItem[] {
// Prepare stuff
const components =
componentsMap ||
mapEmojiTestDataComponents(
mapEmojiTestDataBySequence(testData, getEmojiSequenceKeyword),
getEmojiSequenceKeyword
);
// Function to clean sequence
const cleanSequence = (sequence: number[]): string => {
return getEmojiSequenceKeyword(
sequence.filter(
(num) => num !== vs16Emoji && !components.converted.has(num)
)
);
};
// Map all items
interface SplitListItem {
item: EmojiTestDataItem;
split: SplitEmojiName;
components: ComponentsCount;
}
type SplitList = Record<string, SplitListItem>;
const splitData = Object.create(null) as Record<string, SplitList>;
const defaultSplitDataKey = 'default';
testData.forEach((item) => {
// Split it
const split = splitEmojiNameVariations(
item.name,
item.sequence,
components
);
const parent =
splitData[split.key] ||
(splitData[split.key] = Object.create(null) as SplitList);
// Create unique key based on component types
let sequenceKey = defaultSplitDataKey;
const itemComponents: ComponentsCount = {
'hair-style': 0,
'skin-tone': 0,
};
if (split.components) {
split.variations?.forEach((item) => {
if (typeof item !== 'string') {
itemComponents[item.type]++;
}
});
sequenceKey =
mergeComponentsCount(itemComponents) || defaultSplitDataKey;
}
// Get item if already exists
const prevItem = parent[sequenceKey];
if (!prevItem) {
parent[sequenceKey] = {
item,
split,
components: itemComponents,
};
return;
}
if (
cleanSequence(prevItem.item.sequence) !==
cleanSequence(item.sequence)
) {
// console.log(prevItem.item);
// console.log(item);
throw new Error(`Mismatched items with same key: ${sequenceKey}`);
}
if (item.sequence.length > prevItem.item.sequence.length) {
// Keep longer sequence
parent[sequenceKey] = {
item,
split,
components: itemComponents,
};
}
});
// Parse all items
const results: EmojiComponentsMapItem[] = [];
for (const key in splitData) {
const items = splitData[key];
// Function to get item
const getItem = (
components: ComponentsCount
): EmojiComponentsMapItem | undefined => {
const key = mergeComponentsCount(components) || defaultSplitDataKey;
const item = items[key];
if (!item) {
return;
}
const split = item.split;
const variations = split.variations;
// Get sequence
const sequence = item.item.sequence.slice(
0
) as EmojiComponentsMapItemSequence;
variations?.forEach((chunk) => {
if (typeof chunk === 'string') {
return;
}
sequence[chunk.index] = chunk.type;
});
// Get name
const counter: ComponentsCount = {
'hair-style': 0,
'skin-tone': 0,
};
const nameVariations = variations?.map((chunk) => {
if (typeof chunk === 'string') {
return chunk;
}
const count = counter[chunk.type]++;
if (components[chunk.type] < count) {
throw new Error('Bad variations order');
}
return `{${chunk.type}-${count}}`;
});
const name =
split.base +
(nameVariations?.length
? nameSplit + nameVariations.join(variationSplit)
: '');
return {
name,
sequence,
};
};
const checkChildren = (
parent: EmojiComponentsMapItem,
components: ComponentsCount
): boolean => {
// Attempt to add each type
let found = false;
for (const key in emojiComponents) {
const type = key as EmojiComponentType;
// Find child item
const childComponents = {
...components,
};
childComponents[type]++;
const childItem = getItem(childComponents);
// Get sequence for child item
if (childItem) {
found = true;
// Add child item, check its children
const children =
parent.children ||
(parent.children = {} as Record<
EmojiComponentType,
EmojiComponentsMapItem
>);
children[type] = childItem;
checkChildren(childItem, childComponents);
}
}
return found;
};
// Get main item
const mainItem = getItem({
'hair-style': 0,
'skin-tone': 0,
});
if (mainItem) {
if (
checkChildren(mainItem, {
'hair-style': 0,
'skin-tone': 0,
})
) {
// Found item with children
results.push(mainItem);
// Check sequence for variations
let components = 0;
for (let index = 0; index < sequence.length; index++) {
const num = sequence[index];
for (const key in emojiComponents) {
const type = key as EmojiComponentType;
const range = emojiComponents[type];
if (num >= range[0] && num < range[1]) {
// Within range
variations.push({
index,
type,
});
components++;
}
}
}
return results;
if (variations.length) {
result.variations = variations;
}
if (components) {
result.components = components;
}
return result;
}

View File

@ -21,21 +21,21 @@ const allowedStatus: Set<EmojiStatus> = new Set([
]);
/**
* Callback for converting sequence to string
* Base item
*/
export type EmojiSequenceToStringCallback = (value: number[]) => string;
/**
* Test data item
*/
export interface EmojiTestDataItem {
export interface BaseEmojiTestDataItem {
// Group and subgroup
group: string;
subgroup: string;
// Code points as string, lower case, dash separated
code: string;
// Version when emoji was added
version: string;
}
/**
* Test data item
*/
export interface EmojiTestDataItem extends BaseEmojiTestDataItem {
// Code points as numbers, UTF-32
sequence: number[];
@ -45,20 +45,62 @@ export interface EmojiTestDataItem {
// Status
status: EmojiStatus;
// Version when emoji was added
version: string;
// Emoji name
name: string;
}
export type EmojiTestData = Record<string, EmojiTestDataItem>;
/**
* Get qualified variations from parsed test file
*
* Key is unqualified emoji, value is longest fully qualified emoji
*/
function getQualifiedTestData(data: EmojiTestData): EmojiTestData {
const results = Object.create(null) as EmojiTestData;
for (const key in data) {
const item = data[key];
const sequence = getUnqualifiedEmojiSequence(item.sequence);
const shortKey = getEmojiSequenceKeyword(sequence);
// Check if values mismatch, set results to longest value
if (
!results[shortKey] ||
results[shortKey].sequence.length < sequence.length
) {
results[shortKey] = item;
}
}
return results;
}
/**
* Get all emoji sequences from test file
*
* Returns all emojis as UTF-32 sequences
* Returns all emojis as UTF-32 sequences, where:
* key = unqualified sequence (without \uFE0F)
* value = qualified sequence (with \uFE0F)
*
* Duplicate items that have different versions with and without \uFE0F are
* listed only once, with unqualified sequence as key and longest possible
* qualified sequence as value
*
* Example of 3 identical entries:
* '1F441 FE0F 200D 1F5E8 FE0F'
* '1F441 200D 1F5E8 FE0F'
* '1F441 FE0F 200D 1F5E8'
* '1F441 200D 1F5E8'
*
* Out of these entries, only one item will be returned with:
* key = '1f441-200d-1f5e8' (converted to lower case, separated with dash)
* value.sequence = [0x1F441, 0xFE0F, 0x200D, 0x1F5E8, 0xFE0F]
* value.status = 'fully-qualified'
* other properties in value are identical for all versions
*/
export function parseEmojiTestFile(data: string): EmojiTestDataItem[] {
const results: EmojiTestDataItem[] = [];
export function parseEmojiTestFile(data: string): EmojiTestData {
const results = Object.create(null) as EmojiTestData;
let group: string | undefined;
let subgroup: string | undefined;
@ -106,11 +148,8 @@ export function parseEmojiTestFile(data: string): EmojiTestDataItem[] {
return;
}
const code = firstChunkParts[0]
.trim()
.replace(/\s+/g, '-')
.toLowerCase();
if (!code || !code.match(/^[a-f0-9]+[a-f0-9-]*[a-f0-9]+$/)) {
const code = firstChunkParts[0].trim();
if (!code || !code.match(/^[A-F0-9]+[A-F0-9\s]*[A-F0-9]+$/)) {
return;
}
@ -133,87 +172,24 @@ export function parseEmojiTestFile(data: string): EmojiTestDataItem[] {
}
const name = secondChunkParts.join(' ');
// Get sequence and convert it to cleaned up string
const sequence = getEmojiSequenceFromString(code);
const key = getEmojiSequenceKeyword(sequence);
// Add item
results.push({
if (results[key]) {
throw new Error(`Duplicate entry for "${code}"`);
}
results[key] = {
group,
subgroup,
code,
sequence: getEmojiSequenceFromString(code),
sequence,
emoji,
status,
version,
name,
});
};
});
return results;
}
/**
* Get qualified variations from parsed test file
*
* Key is unqualified emoji, value is longest fully qualified emoji
*/
export function getQualifiedEmojiSequencesMap(
sequences: number[][]
): Map<number[], number[]>;
export function getQualifiedEmojiSequencesMap(
sequences: number[][],
toString: (value: number[]) => string
): Record<string, string>;
export function getQualifiedEmojiSequencesMap(
sequences: number[][],
toString?: (value: number[]) => string
): Map<number[], number[]> | Record<string, string> {
const convert = toString || getEmojiSequenceKeyword;
const results = Object.create(null) as Record<string, string>;
for (let i = 0; i < sequences.length; i++) {
const value = convert(sequences[i]);
const unqualified = convert(getUnqualifiedEmojiSequence(sequences[i]));
// Check if values mismatch, set results to longest value
if (
!results[unqualified] ||
results[unqualified].length < value.length
) {
results[unqualified] = value;
}
}
// Return
if (toString) {
return results;
}
const map: Map<number[], number[]> = new Map();
for (const key in results) {
const value = results[key];
map.set(
getEmojiSequenceFromString(key),
getEmojiSequenceFromString(value)
);
}
return map;
}
/**
* Map data by sequence
*/
export function mapEmojiTestDataBySequence(
testData: EmojiTestDataItem[],
convert: EmojiSequenceToStringCallback
): Record<string, EmojiTestDataItem> {
const testSequences = Object.create(null) as Record<
string,
EmojiTestDataItem
>;
for (let i = 0; i < testData.length; i++) {
const item = testData[i];
const keyword = convert(item.sequence);
if (testSequences[keyword]) {
throw new Error(`Duplicate entries for "${keyword}"`);
}
testSequences[keyword] = item;
}
return testSequences;
return getQualifiedTestData(results);
}

View File

@ -0,0 +1,88 @@
import { vs16Emoji } from '../data';
import {
EmojiSequenceWithComponents,
emojiSequenceWithComponentsToString,
EmojiTestDataComponentsMap,
mapEmojiTestDataComponents,
} from './components';
import { SplitEmojiName, splitEmojiNameVariations } from './name';
import type {
BaseEmojiTestDataItem,
EmojiTestData,
EmojiTestDataItem,
} from './parse';
/**
* Similar test data items as one item
*/
export interface CombinedEmojiTestDataItem extends BaseEmojiTestDataItem {
// Name, split
name: SplitEmojiName;
// Sequence without variations, but with '{skin-tone}'
sequenceKey: string;
// Sequence with components
sequence: EmojiSequenceWithComponents;
}
export type SimilarEmojiTestData = Record<string, CombinedEmojiTestDataItem>;
/**
* Find components in item, generate CombinedEmojiTestDataItem
*/
export function findComponentsInEmojiTestItem(
item: EmojiTestDataItem,
componentsData: EmojiTestDataComponentsMap
): CombinedEmojiTestDataItem {
// Split name
const name = splitEmojiNameVariations(
item.name,
item.sequence,
componentsData
);
// Update sequence
const sequence = [...item.sequence] as EmojiSequenceWithComponents;
name.variations?.forEach((item) => {
if (typeof item !== 'string') {
sequence[item.index] = item.type;
}
});
// Generate new key based on sequence
const sequenceKey = emojiSequenceWithComponentsToString(
sequence.filter((code) => code !== vs16Emoji)
);
return {
...item,
name,
sequenceKey,
sequence,
};
}
/**
* Combine similar items in one iteratable item
*/
export function combineSimilarEmojiTestData(
data: EmojiTestData,
componentsData?: EmojiTestDataComponentsMap
): SimilarEmojiTestData {
const results = Object.create(null) as SimilarEmojiTestData;
componentsData = componentsData || mapEmojiTestDataComponents(data);
for (const key in data) {
const sourceItem = data[key];
if (sourceItem.status !== 'component') {
const item = findComponentsInEmojiTestItem(
sourceItem,
componentsData
);
results[item.sequenceKey] = item;
}
}
return results;
}

View File

@ -0,0 +1,199 @@
import { emojiComponents, EmojiComponentType } from '../data';
import type {
SimilarEmojiTestData,
CombinedEmojiTestDataItem,
} from './similar';
/**
* List of components
*/
type ComponentsCount = Required<Record<EmojiComponentType, number>>;
/**
* Extended tree item
*/
interface TreeSplitEmojiTestDataItem extends CombinedEmojiTestDataItem {
// Components
components: ComponentsCount;
// Components, stringified
componentsKey: string;
}
/**
* Tree item
*/
export interface EmojiComponentsTreeItem {
// Item
item: TreeSplitEmojiTestDataItem;
// Child element(s)
children?: Record<EmojiComponentType, EmojiComponentsTreeItem>;
}
export type EmojiComponentsTree = Record<string, EmojiComponentsTreeItem>;
/**
* Merge types for unique key
*/
function mergeComponentTypes(value: EmojiComponentType[]) {
return '[' + value.join(',') + ']';
}
/**
* Merge count for unique key
*/
function mergeComponentsCount(value: ComponentsCount): string {
const keys: EmojiComponentType[] = [];
for (const key in emojiComponents) {
const type = key as EmojiComponentType;
for (let i = 0; i < value[type]; i++) {
keys.push(type);
}
}
return keys.length ? mergeComponentTypes(keys) : '';
}
/**
* Group data
*/
interface GroupItem {
item: TreeSplitEmojiTestDataItem;
parsed?: true;
}
type GroupItems = Record<string, GroupItem>;
/**
* Get item from group
*/
function getGroupItem(
items: GroupItems,
components: ComponentsCount
): TreeSplitEmojiTestDataItem | undefined {
const key = mergeComponentsCount(components);
const item = items[key];
if (item) {
item.parsed = true;
return item.item;
}
}
/**
* Convert test data to dependencies tree, based on components
*/
export function getEmojiTestDataTree(
data: SimilarEmojiTestData
): EmojiComponentsTree {
// Group items by base name
const groups = Object.create(null) as Record<string, GroupItems>;
for (const key in data) {
const item = data[key];
const text = item.name.key;
const parent = groups[text] || (groups[text] = {} as GroupItems);
// Generate key
const components: ComponentsCount = {
'hair-style': 0,
'skin-tone': 0,
};
item.sequence.forEach((value) => {
if (typeof value !== 'number') {
components[value]++;
}
});
const componentsKey = mergeComponentsCount(components);
if (parent[componentsKey]) {
throw new Error(`Duplicate components tree item for "${text}"`);
}
parent[componentsKey] = {
item: {
...item,
components,
componentsKey,
},
};
}
// Sort items
const results = Object.create(null) as EmojiComponentsTree;
for (const key in groups) {
const items = groups[key];
const check = (
parent: EmojiComponentsTreeItem,
parentComponents: EmojiComponentType[],
type: EmojiComponentType
): true | undefined => {
const item = parse(parentComponents, [type]);
if (item) {
const children =
parent.children ||
(parent.children =
{} as Required<EmojiComponentsTreeItem>['children']);
children[type] = item;
return true;
}
};
const parse = (
parentComponents: EmojiComponentType[],
newComponents: EmojiComponentType[]
): EmojiComponentsTreeItem | undefined => {
// Merge parameters
const components: ComponentsCount = {
'hair-style': 0,
'skin-tone': 0,
};
const componentsList = parentComponents.concat(newComponents);
componentsList.forEach((type) => {
components[type]++;
});
// Get item
let item = getGroupItem(items, components);
if (
!item &&
newComponents.length === 1 &&
newComponents[0] === 'skin-tone'
) {
// Attempt double skin tone
const doubleComponents = {
...components,
};
doubleComponents['skin-tone']++;
item = getGroupItem(items, doubleComponents);
}
if (item) {
// Check child items
const result: EmojiComponentsTreeItem = {
item,
};
// Try adding children
for (const key in emojiComponents) {
check(result, componentsList, key as EmojiComponentType);
}
return result;
}
};
const root = parse([], []);
if (!root) {
throw new Error(`Cannot find parent item for "${key}"`);
}
// Make sure all child items are checked
for (const itemsKey in items) {
if (!items[itemsKey].parsed) {
throw new Error(`Error generating tree for "${key}"`);
}
}
// Make sure root is not empty
if (root.children) {
results[key] = root;
}
}
return results;
}

View File

@ -1,13 +1,12 @@
import {
getEmojiSequenceFromString,
getUnqualifiedEmojiSequence,
joinEmojiSequences,
removeEmojiVariations,
splitEmojiSequences,
} from '../cleanup';
import { convertEmojiSequenceToUTF32 } from '../convert';
import { keycapEmoji, vs16Emoji } from '../data';
import { getEmojiSequenceKeyword } from '../format';
import { EmojiTestDataItem, getQualifiedEmojiSequencesMap } from './parse';
import type { EmojiTestData } from './parse';
/**
* Get qualified sequence, adding optional `FE0F` wherever it might exist
@ -36,6 +35,17 @@ export function guessQualifiedEmojiSequence(sequence: number[]): number[] {
return joinEmojiSequences(split);
}
/**
* Base type to extend
*/
interface BaseSequenceItem {
sequence: number[];
// If present, will be set in value too
// String version of sequence without variation unicode
sequenceKey?: string;
}
/**
* Get qualified variations for emojis
*
@ -45,48 +55,54 @@ export function guessQualifiedEmojiSequence(sequence: number[]): number[] {
* If missing or emoji is missing in test data, `FE0F` is added to every single code emoji.
* It can also be an array of sequences.
*/
export function getQualifiedEmojiVariations(
sequences: number[][],
testData?: (number[] | EmojiTestDataItem)[]
): number[][];
export function getQualifiedEmojiVariations(
sequences: number[][],
testData: (number[] | EmojiTestDataItem)[],
toString: (value: number[]) => string
): string[];
export function getQualifiedEmojiVariations(
sequences: number[][],
testData: (number[] | EmojiTestDataItem)[] = [],
toString?: (value: number[]) => string
): number[][] | string[] {
const convert = toString || getEmojiSequenceKeyword;
const testSequences = testData.map((item) =>
item instanceof Array ? item : item.sequence
export function getQualifiedEmojiVariation<T extends BaseSequenceItem>(
item: T,
testData?: EmojiTestData
): T {
// Convert to UTF-32, get unqualified sequence
const unqualifiedSequence = getUnqualifiedEmojiSequence(
convertEmojiSequenceToUTF32(item.sequence)
);
// Map test data
const testDataMap = getQualifiedEmojiSequencesMap(testSequences, convert);
// Check test data. Key is unqualified sequence
const key = getEmojiSequenceKeyword(unqualifiedSequence);
const testDataItem = testData?.[key];
// Parse all sequences
const set: Set<string> = new Set();
sequences.forEach((sequence) => {
// Convert to UTF-32, remove variations
const convertedSequence = convertEmojiSequenceToUTF32(sequence);
const cleanSequence = removeEmojiVariations(convertedSequence);
// Check test data
const mapKey = convert(cleanSequence);
if (testDataMap[mapKey]) {
// Got item from test data
set.add(testDataMap[mapKey]);
return;
}
// Not in test data: guess variations
set.add(convert(guessQualifiedEmojiSequence(cleanSequence)));
});
const results = Array.from(set);
return toString ? results : results.map(getEmojiSequenceFromString);
const result: T = {
...item,
sequence: testDataItem
? testDataItem.sequence
: guessQualifiedEmojiSequence(unqualifiedSequence),
};
if (result.sequenceKey) {
result.sequenceKey = key;
}
return result;
}
/**
* Get qualified emoji variations for set of emojis, ignoring duplicate entries
*/
export function getQualifiedEmojiVariations<T extends BaseSequenceItem>(
items: T[],
testData?: EmojiTestData
): T[] {
// Parse all sequences
const results = Object.create(null) as Record<string, T>;
for (let i = 0; i < items.length; i++) {
const result = getQualifiedEmojiVariation(items[i], testData);
const key = getEmojiSequenceKeyword(
getUnqualifiedEmojiSequence(result.sequence)
);
if (
!results[key] ||
results[key].sequence.length < result.sequence.length
) {
results[key] = result;
}
}
return Object.values(results);
}

View File

@ -87,7 +87,6 @@ export { loadIcon } from './loader/loader';
export {
getEmojiSequenceFromString,
getUnqualifiedEmojiSequence,
removeEmojiVariations,
} from './emoji/cleanup';
export {
getEmojiCodePoint,
@ -103,12 +102,9 @@ export {
getEmojiSequenceString,
getEmojiSequenceKeyword,
} from './emoji/format';
export {
parseEmojiTestFile,
getQualifiedEmojiSequencesMap,
} from './emoji/test/parse';
export { parseEmojiTestFile } from './emoji/test/parse';
export { getQualifiedEmojiVariations } from './emoji/test/variations';
export { getEmojisSequencesToCopy } from './emoji/test/copy';
// export { getEmojisSequencesToCopy } from './emoji/test/copy';
export {
createOptimisedRegex,
createOptimisedRegexForEmojiSequences,

View File

@ -2,7 +2,7 @@ import { convertEmojiSequenceToUTF32 } from '../lib/emoji/convert';
import {
getEmojiSequenceFromString,
joinEmojiSequences,
removeEmojiVariations,
getUnqualifiedEmojiSequence,
splitEmojiSequences,
} from '../lib/emoji/cleanup';
@ -37,7 +37,7 @@ describe('Testing formatting emoji cleanup', () => {
expect(joinEmojiSequences(split)).toEqual(sequence);
// Remove variations
expect(removeEmojiVariations(sequence)).toEqual([
expect(getUnqualifiedEmojiSequence(sequence)).toEqual([
0x1f441, 0x200d, 0x1f5e8,
]);
});
@ -63,6 +63,6 @@ describe('Testing formatting emoji cleanup', () => {
expect(joinEmojiSequences(split)).toEqual(sequence);
// Remove variations (does nothing for this sequence)
expect(removeEmojiVariations(sequence)).toEqual(sequence);
expect(getUnqualifiedEmojiSequence(sequence)).toEqual(sequence);
});
});

View File

@ -2,10 +2,7 @@ import { readFile, writeFile, unlink } from 'node:fs/promises';
import { emojiVersion } from '../lib/emoji/data';
import { getEmojiSequenceFromString } from '../lib/emoji/cleanup';
import { getEmojiSequenceString } from '../lib/emoji/format';
import {
getQualifiedEmojiSequencesMap,
parseEmojiTestFile,
} from '../lib/emoji/test/parse';
import { parseEmojiTestFile } from '../lib/emoji/test/parse';
import { getQualifiedEmojiVariations } from '../lib/emoji/test/variations';
describe('Qualified variations of emoji sequences', () => {
@ -61,11 +58,18 @@ describe('Qualified variations of emoji sequences', () => {
// mix of simple and complex, with and without variation
'1F9D7 1F3FE 200D 2640 FE0F',
'1F9D7 1F3FF 200D 2642 ',
].map(getEmojiSequenceFromString);
].map((source) => {
const sequence = getEmojiSequenceFromString(source);
return {
source,
sequence,
};
});
const results = getQualifiedEmojiVariations(sequences);
expect(
results.map((sequence) =>
getEmojiSequenceString(sequence, {
results.map((item) =>
getEmojiSequenceString(item.sequence, {
separator: ' ',
case: 'upper',
format: 'utf-32',
@ -98,23 +102,17 @@ describe('Qualified variations of emoji sequences', () => {
console.warn('Test skipped: test data is not available');
return;
}
const testData = parseEmojiTestFile(data);
const testDataSequences = testData.map((item) => item.sequence);
// Make sure testData contains both fully-qualified and unqualified emojis
const testDataStrings = new Set(testData.map((item) => item.code));
// Make sure testData keys contain only unqualified emojis
const testDataStrings = new Set(Object.keys(testData));
expect(testDataStrings.has('1f600')).toBe(true);
expect(testDataStrings.has('263a')).toBe(true);
expect(testDataStrings.has('263a-fe0f')).toBe(true);
expect(testDataStrings.has('263a-fe0f')).toBe(false);
// Test getQualifiedEmojiSequencesMap
const unqualifiedTest = getQualifiedEmojiSequencesMap(
testDataSequences,
getEmojiSequenceString
);
expect(unqualifiedTest['1f600']).toBe('1f600');
expect(unqualifiedTest['263a']).toBe('263a-fe0f');
// Make sure values contain qualified emojis
expect(testData['1f600'].sequence).toEqual([0x1f600]);
expect(testData['263a'].sequence).toEqual([0x263a, 0xfe0f]);
// Sequences to test
const sequences = [
@ -131,17 +129,20 @@ describe('Qualified variations of emoji sequences', () => {
// complex emoji, exists in file
'1F9D1 1F3FE 200D 2764 200D 1F9D1 1F3FB',
// simple emoji, not in test file
'1234',
'25F0',
// fake keycap, not in test file
'2345 20E3 200D 1235',
].map(getEmojiSequenceFromString);
const results = getQualifiedEmojiVariations(
sequences,
testDataSequences
);
].map((source) => {
const sequence = getEmojiSequenceFromString(source);
return {
source,
sequence,
};
});
const results = getQualifiedEmojiVariations(sequences, testData);
expect(
results.map((sequence) =>
getEmojiSequenceString(sequence, {
results.map((item) =>
getEmojiSequenceString(item.sequence, {
separator: ' ',
case: 'upper',
format: 'utf-32',
@ -162,7 +163,7 @@ describe('Qualified variations of emoji sequences', () => {
// complex emoji, exists in file
'1F9D1 1F3FE 200D 2764 FE0F 200D 1F9D1 1F3FB',
// simple emoji, not in test file
'1234 FE0F',
'25F0 FE0F',
// fake keycap, not in test file
'2345 FE0F 20E3 200D 1235 FE0F',
]);

File diff suppressed because it is too large Load Diff