mirror of
https://github.com/iconify/iconify.git
synced 2024-12-13 14:13:06 +00:00
feat: functions to work with emoji unicode
This commit is contained in:
parent
dbfae68da5
commit
e186953d54
@ -102,6 +102,26 @@
|
|||||||
"import": "./lib/customisations/rotate.mjs",
|
"import": "./lib/customisations/rotate.mjs",
|
||||||
"types": "./lib/customisations/rotate.d.ts"
|
"types": "./lib/customisations/rotate.d.ts"
|
||||||
},
|
},
|
||||||
|
"./lib/emoji/cleanup": {
|
||||||
|
"require": "./lib/emoji/cleanup.cjs",
|
||||||
|
"import": "./lib/emoji/cleanup.mjs",
|
||||||
|
"types": "./lib/emoji/cleanup.d.ts"
|
||||||
|
},
|
||||||
|
"./lib/emoji/convert": {
|
||||||
|
"require": "./lib/emoji/convert.cjs",
|
||||||
|
"import": "./lib/emoji/convert.mjs",
|
||||||
|
"types": "./lib/emoji/convert.d.ts"
|
||||||
|
},
|
||||||
|
"./lib/emoji/data": {
|
||||||
|
"require": "./lib/emoji/data.cjs",
|
||||||
|
"import": "./lib/emoji/data.mjs",
|
||||||
|
"types": "./lib/emoji/data.d.ts"
|
||||||
|
},
|
||||||
|
"./lib/emoji/format": {
|
||||||
|
"require": "./lib/emoji/format.cjs",
|
||||||
|
"import": "./lib/emoji/format.mjs",
|
||||||
|
"types": "./lib/emoji/format.d.ts"
|
||||||
|
},
|
||||||
"./lib/icon-set/convert-info": {
|
"./lib/icon-set/convert-info": {
|
||||||
"require": "./lib/icon-set/convert-info.cjs",
|
"require": "./lib/icon-set/convert-info.cjs",
|
||||||
"import": "./lib/icon-set/convert-info.mjs",
|
"import": "./lib/icon-set/convert-info.mjs",
|
||||||
|
86
packages/utils/src/emoji/cleanup.ts
Normal file
86
packages/utils/src/emoji/cleanup.ts
Normal file
@ -0,0 +1,86 @@
|
|||||||
|
import { convertEmojiSequenceToUTF32, getEmojiCodePoint } from './convert';
|
||||||
|
import { emojiTones, joinerEmoji, vs16Emoji } from './data';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get emoji sequence from string
|
||||||
|
*/
|
||||||
|
export function getEmojiSequenceFromString(value: string): number[] {
|
||||||
|
return convertEmojiSequenceToUTF32(
|
||||||
|
value.trim().split(/[\s-]/).map(getEmojiCodePoint)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Split sequence by joiner
|
||||||
|
*/
|
||||||
|
export function splitEmojiSequences(sequence: number[]): number[][] {
|
||||||
|
const results: number[][] = [];
|
||||||
|
let queue: number[] = [];
|
||||||
|
for (let i = 0; i < sequence.length; i++) {
|
||||||
|
const code = sequence[i];
|
||||||
|
if (code === joinerEmoji) {
|
||||||
|
results.push(queue);
|
||||||
|
queue = [];
|
||||||
|
} else {
|
||||||
|
queue.push(code);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
results.push(queue);
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Join emoji sequences
|
||||||
|
*/
|
||||||
|
export function joinEmojiSequences(sequences: number[][]): number[] {
|
||||||
|
let results: number[] = [];
|
||||||
|
for (let i = 0; i < sequences.length; i++) {
|
||||||
|
if (i > 0) {
|
||||||
|
results.push(joinerEmoji);
|
||||||
|
}
|
||||||
|
results = results.concat(sequences[i]);
|
||||||
|
}
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Remove variations
|
||||||
|
*/
|
||||||
|
export function removeEmojiVariations(sequence: number[]): number[] {
|
||||||
|
return sequence.filter((code) => code !== vs16Emoji);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Remove variations
|
||||||
|
*
|
||||||
|
* This function should be used with UTF-32 sequence, not UTF-16
|
||||||
|
*/
|
||||||
|
export function removeEmojiTones(sequence: number[]): number[] {
|
||||||
|
return sequence.filter((code) => {
|
||||||
|
for (let i = 0; i < emojiTones.length; i++) {
|
||||||
|
const range = emojiTones[i];
|
||||||
|
if (code >= range[0] && code < range[1]) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
type MapCallback = (sequence: number[]) => number[];
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Run function on sequences
|
||||||
|
*
|
||||||
|
* Intended to be used with functions such as `removeEmojiVariations` or `removeEmojiTones`
|
||||||
|
*/
|
||||||
|
export function mapEmojiSequences(
|
||||||
|
sequences: number[][],
|
||||||
|
callback: MapCallback,
|
||||||
|
removeEmpty = true
|
||||||
|
): number[][] {
|
||||||
|
const results = sequences.map((sequence) => callback(sequence));
|
||||||
|
return removeEmpty
|
||||||
|
? results.filter((sequence) => sequence.length > 0)
|
||||||
|
: results;
|
||||||
|
}
|
155
packages/utils/src/emoji/convert.ts
Normal file
155
packages/utils/src/emoji/convert.ts
Normal file
@ -0,0 +1,155 @@
|
|||||||
|
import {
|
||||||
|
endUTF32Pair,
|
||||||
|
minUTF32,
|
||||||
|
startUTF32Pair1,
|
||||||
|
startUTF32Pair2,
|
||||||
|
} from './data';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convert string to number
|
||||||
|
*/
|
||||||
|
export function getEmojiCodePoint(code: string): number {
|
||||||
|
return parseInt(code, 16);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* First part of UTF-32 to UTF-16
|
||||||
|
*/
|
||||||
|
function utf32FirstNum(code: number): number {
|
||||||
|
return (((code - minUTF32) >> 0x0a) | 0x0) + startUTF32Pair1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* First part of UTF-32 to UTF-16
|
||||||
|
*/
|
||||||
|
function utf32SecondNum(code: number): number {
|
||||||
|
return ((code - minUTF32) & 0x3ff) + startUTF32Pair2;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get UTF-32 as UTF-16 sequence
|
||||||
|
*/
|
||||||
|
export function splitUTF32Number(code: number): [number, number] | undefined {
|
||||||
|
if (code >= minUTF32) {
|
||||||
|
return [utf32FirstNum(code), utf32SecondNum(code)];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if number is UTF-32 split as UTF-16
|
||||||
|
*
|
||||||
|
* Returns:
|
||||||
|
* - 1 if number fits first number in sequence
|
||||||
|
* - 2 if number fits second number in sequence
|
||||||
|
* - false on failure
|
||||||
|
*/
|
||||||
|
export function isUTF32SplitNumber(value: number): 1 | 2 | false {
|
||||||
|
if (value >= startUTF32Pair1) {
|
||||||
|
if (value < startUTF32Pair2) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
if (value < endUTF32Pair) {
|
||||||
|
return 2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get UTF-16 sequence as UTF-32
|
||||||
|
*/
|
||||||
|
export function mergeUTF32Numbers(
|
||||||
|
part1: number,
|
||||||
|
part2: number
|
||||||
|
): number | undefined {
|
||||||
|
// Check ranges
|
||||||
|
if (
|
||||||
|
part1 < startUTF32Pair1 ||
|
||||||
|
part1 >= startUTF32Pair2 ||
|
||||||
|
part2 < startUTF32Pair2 ||
|
||||||
|
part2 >= endUTF32Pair
|
||||||
|
) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Merge values
|
||||||
|
return (
|
||||||
|
((part1 - startUTF32Pair1) << 0x0a) +
|
||||||
|
(part2 - startUTF32Pair2) +
|
||||||
|
minUTF32
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convert hexadecimal string or number to unicode
|
||||||
|
*/
|
||||||
|
export function getEmojiUnicode(code: number | string): string {
|
||||||
|
return String.fromCodePoint(
|
||||||
|
typeof code === 'number' ? code : getEmojiCodePoint(code)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convert sequence to UTF-16
|
||||||
|
*/
|
||||||
|
export function convertEmojiSequenceToUTF16(numbers: number[]): number[] {
|
||||||
|
const results: number[] = [];
|
||||||
|
for (let i = 0; i < numbers.length; i++) {
|
||||||
|
const code = numbers[i];
|
||||||
|
if (code >= minUTF32) {
|
||||||
|
results.push(utf32FirstNum(code));
|
||||||
|
results.push(utf32SecondNum(code));
|
||||||
|
} else {
|
||||||
|
results.push(code);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convert sequence to UTF-32
|
||||||
|
*/
|
||||||
|
export function convertEmojiSequenceToUTF32(
|
||||||
|
numbers: number[],
|
||||||
|
throwOnError = true
|
||||||
|
): number[] {
|
||||||
|
const results: number[] = [];
|
||||||
|
for (let i = 0; i < numbers.length; i++) {
|
||||||
|
const code = numbers[i];
|
||||||
|
if (code >= minUTF32) {
|
||||||
|
// Already UTF-32
|
||||||
|
results.push(code);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
const part = isUTF32SplitNumber(code);
|
||||||
|
if (!part) {
|
||||||
|
// Nothing to convert
|
||||||
|
results.push(code);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// UTF-32 code as 2 part sequence
|
||||||
|
if (part === 1 && numbers.length > i + 1) {
|
||||||
|
const merged = mergeUTF32Numbers(code, numbers[i + 1]);
|
||||||
|
if (merged) {
|
||||||
|
// Success
|
||||||
|
i++;
|
||||||
|
results.push(merged);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Failed to merge UTF-32 sequence
|
||||||
|
if (throwOnError) {
|
||||||
|
const nextCode = numbers[i + 1];
|
||||||
|
throw new Error(
|
||||||
|
`Invalid UTF-16 sequence: ${code.toString(16)}-${
|
||||||
|
nextCode ? nextCode.toString(16) : 'undefined'
|
||||||
|
}`
|
||||||
|
);
|
||||||
|
}
|
||||||
|
results.push(code);
|
||||||
|
}
|
||||||
|
return results;
|
||||||
|
}
|
40
packages/utils/src/emoji/data.ts
Normal file
40
packages/utils/src/emoji/data.ts
Normal file
@ -0,0 +1,40 @@
|
|||||||
|
/**
|
||||||
|
* Various codes
|
||||||
|
*/
|
||||||
|
|
||||||
|
// Joiner in emoji sequences
|
||||||
|
export const joinerEmoji = 0x200d;
|
||||||
|
|
||||||
|
// Emoji as icon
|
||||||
|
export const vs16Emoji = 0xfe0f;
|
||||||
|
|
||||||
|
// Keycap, preceeded by mandatory VS16 for full emoji
|
||||||
|
export const keycapEmoji = 0x20e3;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Variations, UTF-32
|
||||||
|
*
|
||||||
|
* First value in array is minimum, second value is maximum+1
|
||||||
|
*/
|
||||||
|
type Range = [number, number];
|
||||||
|
export const emojiTones: Range[] = [
|
||||||
|
// Skin tones
|
||||||
|
[0x1f3fb, 0x1f400],
|
||||||
|
// Hair tones
|
||||||
|
[0x1f9b0, 0x1f9b4],
|
||||||
|
];
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Minimum UTF-32 number
|
||||||
|
*/
|
||||||
|
export const minUTF32 = 0x10000;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Codes for UTF-32 characters presented as UTF-16
|
||||||
|
*
|
||||||
|
* startUTF32Pair1 <= code < startUTF32Pair2 -> code for first character in pair
|
||||||
|
* startUTF32Pair2 <= code < endUTF32Pair -> code for second character in pair
|
||||||
|
*/
|
||||||
|
export const startUTF32Pair1 = 0xd800;
|
||||||
|
export const startUTF32Pair2 = 0xdc00;
|
||||||
|
export const endUTF32Pair = 0xe000;
|
131
packages/utils/src/emoji/format.ts
Normal file
131
packages/utils/src/emoji/format.ts
Normal file
@ -0,0 +1,131 @@
|
|||||||
|
import {
|
||||||
|
convertEmojiSequenceToUTF16,
|
||||||
|
convertEmojiSequenceToUTF32,
|
||||||
|
} from './convert';
|
||||||
|
|
||||||
|
interface UnicodeOptions {
|
||||||
|
// Prefix before each character '\\u'
|
||||||
|
prefix: string;
|
||||||
|
|
||||||
|
// Separator between characters
|
||||||
|
separator: string;
|
||||||
|
|
||||||
|
// Case conversion
|
||||||
|
case: 'upper' | 'lower';
|
||||||
|
|
||||||
|
// UTF conversion
|
||||||
|
format: 'utf-32' | 'utf-16';
|
||||||
|
|
||||||
|
// Add '0' for code shorter than 4 letters
|
||||||
|
add0: boolean;
|
||||||
|
|
||||||
|
// Throw on error
|
||||||
|
throwOnError: boolean;
|
||||||
|
}
|
||||||
|
|
||||||
|
const defaultUnicodeOptions: UnicodeOptions = {
|
||||||
|
prefix: '',
|
||||||
|
separator: '',
|
||||||
|
case: 'lower',
|
||||||
|
format: 'utf-32',
|
||||||
|
add0: false,
|
||||||
|
throwOnError: true,
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convert number to string
|
||||||
|
*/
|
||||||
|
function convert(sequence: number[], options: UnicodeOptions): string {
|
||||||
|
const prefix = options.prefix;
|
||||||
|
const func = options.case === 'upper' ? 'toUpperCase' : 'toLowerCase';
|
||||||
|
|
||||||
|
const cleanSequence =
|
||||||
|
options.format === 'utf-16'
|
||||||
|
? convertEmojiSequenceToUTF16(sequence)
|
||||||
|
: convertEmojiSequenceToUTF32(sequence, options.throwOnError);
|
||||||
|
|
||||||
|
return cleanSequence
|
||||||
|
.map((code) => {
|
||||||
|
let str = code.toString(16);
|
||||||
|
if (options.add0 && str.length < 4) {
|
||||||
|
str = '0'.repeat(4 - str.length) + str;
|
||||||
|
}
|
||||||
|
return prefix + str[func]();
|
||||||
|
})
|
||||||
|
.join(options.separator);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convert unicode number to string
|
||||||
|
*/
|
||||||
|
export function getEmojiUnicodeString(
|
||||||
|
code: number,
|
||||||
|
options: Partial<UnicodeOptions> = {}
|
||||||
|
): string {
|
||||||
|
return convert([code], {
|
||||||
|
...defaultUnicodeOptions,
|
||||||
|
...options,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
const defaultSequenceOptions: UnicodeOptions = {
|
||||||
|
...defaultUnicodeOptions,
|
||||||
|
separator: '-',
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convert unicode numbers sequence to string
|
||||||
|
*/
|
||||||
|
export function getEmojiSequenceString(
|
||||||
|
sequence: number[],
|
||||||
|
options: Partial<UnicodeOptions> = {}
|
||||||
|
): string {
|
||||||
|
return convert(sequence, {
|
||||||
|
...defaultSequenceOptions,
|
||||||
|
...options,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
const regexOptions: UnicodeOptions = {
|
||||||
|
prefix: '\\u',
|
||||||
|
separator: '',
|
||||||
|
case: 'upper',
|
||||||
|
format: 'utf-16',
|
||||||
|
add0: false,
|
||||||
|
throwOnError: true,
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Merge unicode numbers sequence as regex
|
||||||
|
*/
|
||||||
|
export function emojiSequenceToRegex(
|
||||||
|
sequence: number[],
|
||||||
|
throwOnError = true
|
||||||
|
): string {
|
||||||
|
return convert(sequence, {
|
||||||
|
...regexOptions,
|
||||||
|
throwOnError,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
const keywordOptions: UnicodeOptions = {
|
||||||
|
prefix: '',
|
||||||
|
separator: '-',
|
||||||
|
case: 'lower',
|
||||||
|
format: 'utf-32',
|
||||||
|
add0: true,
|
||||||
|
throwOnError: true,
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Merge unicode numbers sequence as icon keyword
|
||||||
|
*/
|
||||||
|
export function emojiSequenceToKeyword(
|
||||||
|
sequence: number[],
|
||||||
|
throwOnError = true
|
||||||
|
): string {
|
||||||
|
return convert(sequence, {
|
||||||
|
...keywordOptions,
|
||||||
|
throwOnError,
|
||||||
|
});
|
||||||
|
}
|
87
packages/utils/tests/emoji-cleanup-test.ts
Normal file
87
packages/utils/tests/emoji-cleanup-test.ts
Normal file
@ -0,0 +1,87 @@
|
|||||||
|
/* eslint-disable @typescript-eslint/no-non-null-assertion */
|
||||||
|
import {
|
||||||
|
getEmojiSequenceFromString,
|
||||||
|
joinEmojiSequences,
|
||||||
|
mapEmojiSequences,
|
||||||
|
removeEmojiTones,
|
||||||
|
removeEmojiVariations,
|
||||||
|
splitEmojiSequences,
|
||||||
|
} from '../lib/emoji/cleanup';
|
||||||
|
|
||||||
|
describe('Testing formatting emoji cleanup', () => {
|
||||||
|
it('UTF-32 sequence', () => {
|
||||||
|
// Convert from string
|
||||||
|
const sequence = getEmojiSequenceFromString(
|
||||||
|
'1F441 FE0F 200D 1F5E8 FE0F '
|
||||||
|
);
|
||||||
|
expect(sequence).toEqual([0x1f441, 0xfe0f, 0x200d, 0x1f5e8, 0xfe0f]);
|
||||||
|
|
||||||
|
// Split
|
||||||
|
const split = splitEmojiSequences(sequence);
|
||||||
|
expect(split).toEqual([
|
||||||
|
[0x1f441, 0xfe0f],
|
||||||
|
[0x1f5e8, 0xfe0f],
|
||||||
|
]);
|
||||||
|
|
||||||
|
// Join again
|
||||||
|
expect(joinEmojiSequences(split)).toEqual(sequence);
|
||||||
|
|
||||||
|
// Remove variations
|
||||||
|
expect(removeEmojiVariations(sequence)).toEqual([
|
||||||
|
0x1f441, 0x200d, 0x1f5e8,
|
||||||
|
]);
|
||||||
|
expect(mapEmojiSequences(split, removeEmojiVariations)).toEqual([
|
||||||
|
[0x1f441],
|
||||||
|
[0x1f5e8],
|
||||||
|
]);
|
||||||
|
|
||||||
|
// Remove tones (does nothing for this sequence)
|
||||||
|
expect(removeEmojiTones(sequence)).toEqual(sequence);
|
||||||
|
expect(mapEmojiSequences(split, removeEmojiTones)).toEqual(split);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('UTF-32 sequence with tones', () => {
|
||||||
|
// Convert from string
|
||||||
|
const sequence = getEmojiSequenceFromString(
|
||||||
|
'1f9d1-1f3ff-200d-1f91d-200d-1f9d1-1f3ff'
|
||||||
|
);
|
||||||
|
expect(sequence).toEqual([
|
||||||
|
0x1f9d1, 0x1f3ff, 0x200d, 0x1f91d, 0x200d, 0x1f9d1, 0x1f3ff,
|
||||||
|
]);
|
||||||
|
|
||||||
|
// Split
|
||||||
|
const split = splitEmojiSequences(sequence);
|
||||||
|
expect(split).toEqual([
|
||||||
|
[0x1f9d1, 0x1f3ff],
|
||||||
|
[0x1f91d],
|
||||||
|
[0x1f9d1, 0x1f3ff],
|
||||||
|
]);
|
||||||
|
|
||||||
|
// Join again
|
||||||
|
expect(joinEmojiSequences(split)).toEqual(sequence);
|
||||||
|
|
||||||
|
// Remove variations (does nothing for this sequence)
|
||||||
|
expect(removeEmojiVariations(sequence)).toEqual(sequence);
|
||||||
|
expect(mapEmojiSequences(split, removeEmojiVariations)).toEqual(split);
|
||||||
|
|
||||||
|
// Remove tones
|
||||||
|
expect(removeEmojiTones(sequence)).toEqual([
|
||||||
|
0x1f9d1, 0x200d, 0x1f91d, 0x200d, 0x1f9d1,
|
||||||
|
]);
|
||||||
|
expect(mapEmojiSequences(split, removeEmojiTones)).toEqual([
|
||||||
|
[0x1f9d1],
|
||||||
|
[0x1f91d],
|
||||||
|
[0x1f9d1],
|
||||||
|
]);
|
||||||
|
|
||||||
|
// Hair tones (bad emoji, second chunk only has tone without emoji)
|
||||||
|
const sequence2 = getEmojiSequenceFromString('1F471 1F3FC-200D 1F3FF');
|
||||||
|
expect(sequence2).toEqual([0x1f471, 0x1f3fc, 0x200d, 0x1f3ff]);
|
||||||
|
const split2 = splitEmojiSequences(sequence2);
|
||||||
|
|
||||||
|
expect(removeEmojiTones(sequence2)).toEqual([0x1f471, 0x200d]);
|
||||||
|
expect(mapEmojiSequences(split2, removeEmojiTones)).toEqual([
|
||||||
|
[0x1f471],
|
||||||
|
]);
|
||||||
|
});
|
||||||
|
});
|
135
packages/utils/tests/emoji-convert-test.ts
Normal file
135
packages/utils/tests/emoji-convert-test.ts
Normal file
@ -0,0 +1,135 @@
|
|||||||
|
/* eslint-disable @typescript-eslint/no-non-null-assertion */
|
||||||
|
import {
|
||||||
|
getEmojiCodePoint,
|
||||||
|
getEmojiUnicode,
|
||||||
|
splitUTF32Number,
|
||||||
|
isUTF32SplitNumber,
|
||||||
|
mergeUTF32Numbers,
|
||||||
|
convertEmojiSequenceToUTF16,
|
||||||
|
convertEmojiSequenceToUTF32,
|
||||||
|
} from '../lib/emoji/convert';
|
||||||
|
|
||||||
|
describe('Testing emoji code points', () => {
|
||||||
|
it('UTF-16', () => {
|
||||||
|
// Convert to number
|
||||||
|
const codePoint = getEmojiCodePoint('2764');
|
||||||
|
expect(codePoint).toBe(parseInt('2764', 16));
|
||||||
|
|
||||||
|
// Check for UTF-32
|
||||||
|
expect(splitUTF32Number(codePoint)).toBeUndefined();
|
||||||
|
expect(isUTF32SplitNumber(codePoint)).toBe(false);
|
||||||
|
|
||||||
|
// Convert to character
|
||||||
|
expect(getEmojiUnicode(codePoint)).toBe('\u2764');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('UTF-32', () => {
|
||||||
|
// Convert to number
|
||||||
|
const codePoint = getEmojiCodePoint('1F49A');
|
||||||
|
expect(codePoint).toBe(parseInt('1F49A', 16));
|
||||||
|
expect(isUTF32SplitNumber(codePoint)).toBe(false);
|
||||||
|
|
||||||
|
// Convert to UTF-16 sequence
|
||||||
|
const sequence = splitUTF32Number(codePoint);
|
||||||
|
expect(sequence).toEqual([55357, 56474]);
|
||||||
|
expect(isUTF32SplitNumber(sequence![0])).toBe(1);
|
||||||
|
expect(isUTF32SplitNumber(sequence![1])).toBe(2);
|
||||||
|
|
||||||
|
// Convert back to UTF-32
|
||||||
|
expect(mergeUTF32Numbers(...sequence!)).toBe(codePoint);
|
||||||
|
|
||||||
|
// Convert to string
|
||||||
|
expect(getEmojiUnicode(codePoint)).toBe('\uD83D\uDC9A');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('Sequences to UTF-16', () => {
|
||||||
|
// Nothing to convert
|
||||||
|
expect(convertEmojiSequenceToUTF16([])).toEqual([]);
|
||||||
|
expect(convertEmojiSequenceToUTF16([0x263a, 0xfe0f])).toEqual([
|
||||||
|
0x263a, 0xfe0f,
|
||||||
|
]);
|
||||||
|
|
||||||
|
// UTF-32
|
||||||
|
expect(
|
||||||
|
convertEmojiSequenceToUTF16([
|
||||||
|
0x1f441, 0xfe0f, 0x200d, 0x1f5e8, 0xfe0f,
|
||||||
|
])
|
||||||
|
).toEqual([0xd83d, 0xdc41, 0xfe0f, 0x200d, 0xd83d, 0xdde8, 0xfe0f]);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('Sequences to UTF-32', () => {
|
||||||
|
let thrown: boolean;
|
||||||
|
|
||||||
|
// Nothing to convert
|
||||||
|
expect(convertEmojiSequenceToUTF32([])).toEqual([]);
|
||||||
|
expect(
|
||||||
|
convertEmojiSequenceToUTF32([
|
||||||
|
0x1f441, 0xfe0f, 0x200d, 0x1f5e8, 0xfe0f,
|
||||||
|
])
|
||||||
|
).toEqual([0x1f441, 0xfe0f, 0x200d, 0x1f5e8, 0xfe0f]);
|
||||||
|
|
||||||
|
// UTF-16
|
||||||
|
expect(
|
||||||
|
convertEmojiSequenceToUTF32([
|
||||||
|
0xd83d, 0xdc41, 0xfe0f, 0x200d, 0xd83d, 0xdde8, 0xfe0f,
|
||||||
|
])
|
||||||
|
).toEqual([0x1f441, 0xfe0f, 0x200d, 0x1f5e8, 0xfe0f]);
|
||||||
|
|
||||||
|
// Bad UTF-16: first character is wrong
|
||||||
|
expect(
|
||||||
|
convertEmojiSequenceToUTF32(
|
||||||
|
[0xa83d, 0xdc41, 0xfe0f, 0x200d, 0xd83d, 0xdde8, 0xfe0f],
|
||||||
|
false
|
||||||
|
)
|
||||||
|
).toEqual([0xa83d, 0xdc41, 0xfe0f, 0x200d, 0x1f5e8, 0xfe0f]);
|
||||||
|
thrown = false;
|
||||||
|
try {
|
||||||
|
expect(
|
||||||
|
convertEmojiSequenceToUTF32([
|
||||||
|
0xa83d, 0xdc41, 0xfe0f, 0x200d, 0xd83d, 0xdde8, 0xfe0f,
|
||||||
|
])
|
||||||
|
);
|
||||||
|
} catch {
|
||||||
|
thrown = true;
|
||||||
|
}
|
||||||
|
expect(thrown).toBe(true);
|
||||||
|
|
||||||
|
// Bad UTF-16: second character is wrong
|
||||||
|
expect(
|
||||||
|
convertEmojiSequenceToUTF32(
|
||||||
|
[0xd83d, 0xec41, 0xfe0f, 0x200d, 0xd83d, 0xdde8, 0xfe0f],
|
||||||
|
false
|
||||||
|
)
|
||||||
|
).toEqual([0xd83d, 0xec41, 0xfe0f, 0x200d, 0x1f5e8, 0xfe0f]);
|
||||||
|
thrown = false;
|
||||||
|
try {
|
||||||
|
expect(
|
||||||
|
convertEmojiSequenceToUTF32([
|
||||||
|
0xd83d, 0xec41, 0xfe0f, 0x200d, 0xd83d, 0xdde8, 0xfe0f,
|
||||||
|
])
|
||||||
|
);
|
||||||
|
} catch {
|
||||||
|
thrown = true;
|
||||||
|
}
|
||||||
|
expect(thrown).toBe(true);
|
||||||
|
|
||||||
|
// Bad UTF-16: unexpected end
|
||||||
|
expect(
|
||||||
|
convertEmojiSequenceToUTF32(
|
||||||
|
[0xd83d, 0xdc41, 0xfe0f, 0x200d, 0xd83d],
|
||||||
|
false
|
||||||
|
)
|
||||||
|
).toEqual([0x1f441, 0xfe0f, 0x200d, 0xd83d]);
|
||||||
|
thrown = false;
|
||||||
|
try {
|
||||||
|
expect(
|
||||||
|
convertEmojiSequenceToUTF32([
|
||||||
|
0xd83d, 0xdc41, 0xfe0f, 0x200d, 0xd83d,
|
||||||
|
])
|
||||||
|
);
|
||||||
|
} catch {
|
||||||
|
thrown = true;
|
||||||
|
}
|
||||||
|
expect(thrown).toBe(true);
|
||||||
|
});
|
||||||
|
});
|
55
packages/utils/tests/emoji-format-test.ts
Normal file
55
packages/utils/tests/emoji-format-test.ts
Normal file
@ -0,0 +1,55 @@
|
|||||||
|
/* eslint-disable @typescript-eslint/no-non-null-assertion */
|
||||||
|
import {
|
||||||
|
getEmojiSequenceString,
|
||||||
|
emojiSequenceToRegex,
|
||||||
|
emojiSequenceToKeyword,
|
||||||
|
} from '../lib/emoji/format';
|
||||||
|
|
||||||
|
describe('Testing formatting emoji code points', () => {
|
||||||
|
it('Empty sequence', () => {
|
||||||
|
const sequence: number[] = [];
|
||||||
|
expect(getEmojiSequenceString(sequence)).toBe('');
|
||||||
|
expect(emojiSequenceToRegex(sequence)).toBe('');
|
||||||
|
expect(emojiSequenceToKeyword(sequence)).toBe('');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('Keycap sequence', () => {
|
||||||
|
const sequence: number[] = [0x23, 0xfe0f, 0x20e3];
|
||||||
|
|
||||||
|
expect(getEmojiSequenceString(sequence)).toBe('23-fe0f-20e3');
|
||||||
|
expect(emojiSequenceToRegex(sequence)).toBe('\\u23\\uFE0F\\u20E3');
|
||||||
|
expect(emojiSequenceToKeyword(sequence)).toBe('0023-fe0f-20e3');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('UTF-16 sequence', () => {
|
||||||
|
const sequence: number[] = [
|
||||||
|
0xd83d, 0xdc41, 0xfe0f, 0x200d, 0xd83d, 0xdde8, 0xfe0f,
|
||||||
|
];
|
||||||
|
|
||||||
|
// UTF-32 = 0x1f441, 0xfe0f, 0x200d, 0x1f5e8, 0xfe0f
|
||||||
|
expect(getEmojiSequenceString(sequence)).toBe(
|
||||||
|
'1f441-fe0f-200d-1f5e8-fe0f'
|
||||||
|
);
|
||||||
|
expect(emojiSequenceToRegex(sequence)).toBe(
|
||||||
|
'\\uD83D\\uDC41\\uFE0F\\u200D\\uD83D\\uDDE8\\uFE0F'
|
||||||
|
);
|
||||||
|
expect(emojiSequenceToKeyword(sequence)).toBe(
|
||||||
|
'1f441-fe0f-200d-1f5e8-fe0f'
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('UTF-32 sequence', () => {
|
||||||
|
const sequence: number[] = [0x1f441, 0xfe0f, 0x200d, 0x1f5e8, 0xfe0f];
|
||||||
|
|
||||||
|
// UTF-16 = 0xd83d, 0xdc41, 0xfe0f, 0x200d, 0xd83d, 0xdde8, 0xfe0f,
|
||||||
|
expect(getEmojiSequenceString(sequence)).toBe(
|
||||||
|
'1f441-fe0f-200d-1f5e8-fe0f'
|
||||||
|
);
|
||||||
|
expect(emojiSequenceToRegex(sequence)).toBe(
|
||||||
|
'\\uD83D\\uDC41\\uFE0F\\u200D\\uD83D\\uDDE8\\uFE0F'
|
||||||
|
);
|
||||||
|
expect(emojiSequenceToKeyword(sequence)).toBe(
|
||||||
|
'1f441-fe0f-200d-1f5e8-fe0f'
|
||||||
|
);
|
||||||
|
});
|
||||||
|
});
|
5024
packages/utils/tests/fixtures/download-emoji-15.0.txt
vendored
Normal file
5024
packages/utils/tests/fixtures/download-emoji-15.0.txt
vendored
Normal file
File diff suppressed because it is too large
Load Diff
176
packages/utils/tests/validate-emoji-unicode-test.ts
Normal file
176
packages/utils/tests/validate-emoji-unicode-test.ts
Normal file
@ -0,0 +1,176 @@
|
|||||||
|
/* eslint-disable @typescript-eslint/no-non-null-assertion */
|
||||||
|
import { readFile, writeFile, unlink } from 'node:fs/promises';
|
||||||
|
import { getEmojiCodePoint, splitUTF32Number } from '../lib/emoji/convert';
|
||||||
|
import {
|
||||||
|
startUTF32Pair1,
|
||||||
|
startUTF32Pair2,
|
||||||
|
endUTF32Pair,
|
||||||
|
} from '../lib/emoji/data';
|
||||||
|
|
||||||
|
// Emoji types
|
||||||
|
type EmojiType =
|
||||||
|
| 'component'
|
||||||
|
| 'fully-qualified'
|
||||||
|
| 'minimally-qualified'
|
||||||
|
| 'unqualified';
|
||||||
|
const componentType: EmojiType = 'component';
|
||||||
|
|
||||||
|
// Allowed types, in order of conversion
|
||||||
|
const allowedTypes: Set<EmojiType> = new Set([
|
||||||
|
componentType,
|
||||||
|
'fully-qualified',
|
||||||
|
'minimally-qualified',
|
||||||
|
'unqualified',
|
||||||
|
]);
|
||||||
|
|
||||||
|
describe('Testing emoji code points', () => {
|
||||||
|
it('Checking available ranges', async () => {
|
||||||
|
// Fetch emojis, cache it
|
||||||
|
const version = '15.0';
|
||||||
|
const source = `tests/fixtures/download-emoji-${version}.txt`;
|
||||||
|
|
||||||
|
let data: string | undefined;
|
||||||
|
try {
|
||||||
|
data = await readFile(source, 'utf8');
|
||||||
|
} catch {
|
||||||
|
//
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!data) {
|
||||||
|
data = (
|
||||||
|
await (
|
||||||
|
await fetch(
|
||||||
|
`https://unicode.org/Public/emoji/${version}/emoji-test.txt`
|
||||||
|
)
|
||||||
|
).text()
|
||||||
|
).toString();
|
||||||
|
await writeFile(source, data, 'utf8');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test content, unlink cache on failure
|
||||||
|
if (data.indexOf(`# Version: ${version}`) === -1) {
|
||||||
|
try {
|
||||||
|
await unlink(source);
|
||||||
|
} catch {
|
||||||
|
//
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get all emojis
|
||||||
|
const utf16: Set<string> = new Set();
|
||||||
|
const utf32: Set<string> = new Set();
|
||||||
|
data.split('\n').forEach((line) => {
|
||||||
|
line = line.trim();
|
||||||
|
const parts = line.split('#');
|
||||||
|
if (parts.length < 2) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get code and type from first chunk
|
||||||
|
const firstChunk = (parts.shift() as string).trim();
|
||||||
|
if (!firstChunk) {
|
||||||
|
// Empty first chunk: a comment
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const firstChunkParts = firstChunk.split(';');
|
||||||
|
if (firstChunkParts.length !== 2) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const text = firstChunkParts[0].trim();
|
||||||
|
const code = text.toLowerCase().replace(/\s+/g, '-');
|
||||||
|
if (!code) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const type = firstChunkParts[1].trim() as EmojiType;
|
||||||
|
if (!allowedTypes.has(type)) {
|
||||||
|
throw new Error(`Bad emoji type: ${type}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add code
|
||||||
|
code.split('-').forEach((chunk) => {
|
||||||
|
switch (chunk.length) {
|
||||||
|
case 2:
|
||||||
|
case 4:
|
||||||
|
utf16.add(chunk);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 5:
|
||||||
|
utf32.add(chunk);
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
throw new Error(`Bad emoji code: ${text}`);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
// Code points should not be empty
|
||||||
|
expect(utf16.size).toBeGreaterThan(0);
|
||||||
|
expect(utf32.size).toBeGreaterThan(0);
|
||||||
|
|
||||||
|
// Get min/max values
|
||||||
|
interface Range {
|
||||||
|
min: number;
|
||||||
|
max: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
function add(code: number, range: Range | undefined): Range {
|
||||||
|
if (!range) {
|
||||||
|
return {
|
||||||
|
min: code,
|
||||||
|
max: code,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
range.min = Math.min(range.min, code);
|
||||||
|
range.max = Math.max(range.max, code);
|
||||||
|
return range;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ... for UTF-16 code points
|
||||||
|
let utf16Range: Range | undefined;
|
||||||
|
utf16.forEach((str) => {
|
||||||
|
const code = getEmojiCodePoint(str);
|
||||||
|
if (code > startUTF32Pair1 && code < endUTF32Pair) {
|
||||||
|
throw new Error(`UTF16 in UTF32 range: ${str}`);
|
||||||
|
}
|
||||||
|
utf16Range = add(code, utf16Range);
|
||||||
|
});
|
||||||
|
|
||||||
|
// ... for UTF-32 code points
|
||||||
|
let utf32FirstRange: Range | undefined;
|
||||||
|
let utf32SecondRange: Range | undefined;
|
||||||
|
utf32.forEach((str) => {
|
||||||
|
const pair = splitUTF32Number(getEmojiCodePoint(str));
|
||||||
|
if (pair) {
|
||||||
|
utf32FirstRange = add(pair[0], utf32FirstRange);
|
||||||
|
utf32SecondRange = add(pair[1], utf32SecondRange);
|
||||||
|
} else {
|
||||||
|
throw new Error(`Unexpected item in UTF32 set: ${str}`);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Check UTF-32 emoji ranges
|
||||||
|
expect(utf32FirstRange).toBeDefined();
|
||||||
|
expect(utf32FirstRange!.min).toBeGreaterThanOrEqual(startUTF32Pair1);
|
||||||
|
expect(utf32FirstRange!.max).toBeLessThan(startUTF32Pair2);
|
||||||
|
|
||||||
|
expect(utf32SecondRange).toBeDefined();
|
||||||
|
expect(utf32SecondRange!.min).toBeGreaterThanOrEqual(startUTF32Pair2);
|
||||||
|
expect(utf32SecondRange!.max).toBeLessThan(endUTF32Pair);
|
||||||
|
|
||||||
|
// Dump ranges
|
||||||
|
/*
|
||||||
|
function dump(item: Range | undefined): string {
|
||||||
|
if (!item) {
|
||||||
|
return 'undefined';
|
||||||
|
}
|
||||||
|
return `${item.min} - ${item.max} (0x${item.min
|
||||||
|
.toString(16)
|
||||||
|
.toUpperCase()} - 0x${item.max.toString(16).toUpperCase()})`;
|
||||||
|
}
|
||||||
|
console.log('UTF16:', dump(utf16Range));
|
||||||
|
console.log('UTF32:', dump(utf32FirstRange), dump(utf32SecondRange));
|
||||||
|
*/
|
||||||
|
});
|
||||||
|
});
|
Loading…
Reference in New Issue
Block a user