mirror of
https://github.com/iconify/iconify.git
synced 2024-12-13 14:13:06 +00:00
feat: functions to work with emoji unicode
This commit is contained in:
parent
dbfae68da5
commit
e186953d54
@ -102,6 +102,26 @@
|
||||
"import": "./lib/customisations/rotate.mjs",
|
||||
"types": "./lib/customisations/rotate.d.ts"
|
||||
},
|
||||
"./lib/emoji/cleanup": {
|
||||
"require": "./lib/emoji/cleanup.cjs",
|
||||
"import": "./lib/emoji/cleanup.mjs",
|
||||
"types": "./lib/emoji/cleanup.d.ts"
|
||||
},
|
||||
"./lib/emoji/convert": {
|
||||
"require": "./lib/emoji/convert.cjs",
|
||||
"import": "./lib/emoji/convert.mjs",
|
||||
"types": "./lib/emoji/convert.d.ts"
|
||||
},
|
||||
"./lib/emoji/data": {
|
||||
"require": "./lib/emoji/data.cjs",
|
||||
"import": "./lib/emoji/data.mjs",
|
||||
"types": "./lib/emoji/data.d.ts"
|
||||
},
|
||||
"./lib/emoji/format": {
|
||||
"require": "./lib/emoji/format.cjs",
|
||||
"import": "./lib/emoji/format.mjs",
|
||||
"types": "./lib/emoji/format.d.ts"
|
||||
},
|
||||
"./lib/icon-set/convert-info": {
|
||||
"require": "./lib/icon-set/convert-info.cjs",
|
||||
"import": "./lib/icon-set/convert-info.mjs",
|
||||
|
86
packages/utils/src/emoji/cleanup.ts
Normal file
86
packages/utils/src/emoji/cleanup.ts
Normal file
@ -0,0 +1,86 @@
|
||||
import { convertEmojiSequenceToUTF32, getEmojiCodePoint } from './convert';
|
||||
import { emojiTones, joinerEmoji, vs16Emoji } from './data';
|
||||
|
||||
/**
|
||||
* Get emoji sequence from string
|
||||
*/
|
||||
export function getEmojiSequenceFromString(value: string): number[] {
|
||||
return convertEmojiSequenceToUTF32(
|
||||
value.trim().split(/[\s-]/).map(getEmojiCodePoint)
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Split sequence by joiner
|
||||
*/
|
||||
export function splitEmojiSequences(sequence: number[]): number[][] {
|
||||
const results: number[][] = [];
|
||||
let queue: number[] = [];
|
||||
for (let i = 0; i < sequence.length; i++) {
|
||||
const code = sequence[i];
|
||||
if (code === joinerEmoji) {
|
||||
results.push(queue);
|
||||
queue = [];
|
||||
} else {
|
||||
queue.push(code);
|
||||
}
|
||||
}
|
||||
results.push(queue);
|
||||
return results;
|
||||
}
|
||||
|
||||
/**
|
||||
* Join emoji sequences
|
||||
*/
|
||||
export function joinEmojiSequences(sequences: number[][]): number[] {
|
||||
let results: number[] = [];
|
||||
for (let i = 0; i < sequences.length; i++) {
|
||||
if (i > 0) {
|
||||
results.push(joinerEmoji);
|
||||
}
|
||||
results = results.concat(sequences[i]);
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove variations
|
||||
*/
|
||||
export function removeEmojiVariations(sequence: number[]): number[] {
|
||||
return sequence.filter((code) => code !== vs16Emoji);
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove variations
|
||||
*
|
||||
* This function should be used with UTF-32 sequence, not UTF-16
|
||||
*/
|
||||
export function removeEmojiTones(sequence: number[]): number[] {
|
||||
return sequence.filter((code) => {
|
||||
for (let i = 0; i < emojiTones.length; i++) {
|
||||
const range = emojiTones[i];
|
||||
if (code >= range[0] && code < range[1]) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
});
|
||||
}
|
||||
|
||||
type MapCallback = (sequence: number[]) => number[];
|
||||
|
||||
/**
|
||||
* Run function on sequences
|
||||
*
|
||||
* Intended to be used with functions such as `removeEmojiVariations` or `removeEmojiTones`
|
||||
*/
|
||||
export function mapEmojiSequences(
|
||||
sequences: number[][],
|
||||
callback: MapCallback,
|
||||
removeEmpty = true
|
||||
): number[][] {
|
||||
const results = sequences.map((sequence) => callback(sequence));
|
||||
return removeEmpty
|
||||
? results.filter((sequence) => sequence.length > 0)
|
||||
: results;
|
||||
}
|
155
packages/utils/src/emoji/convert.ts
Normal file
155
packages/utils/src/emoji/convert.ts
Normal file
@ -0,0 +1,155 @@
|
||||
import {
|
||||
endUTF32Pair,
|
||||
minUTF32,
|
||||
startUTF32Pair1,
|
||||
startUTF32Pair2,
|
||||
} from './data';
|
||||
|
||||
/**
|
||||
* Convert string to number
|
||||
*/
|
||||
export function getEmojiCodePoint(code: string): number {
|
||||
return parseInt(code, 16);
|
||||
}
|
||||
|
||||
/**
|
||||
* First part of UTF-32 to UTF-16
|
||||
*/
|
||||
function utf32FirstNum(code: number): number {
|
||||
return (((code - minUTF32) >> 0x0a) | 0x0) + startUTF32Pair1;
|
||||
}
|
||||
|
||||
/**
|
||||
* First part of UTF-32 to UTF-16
|
||||
*/
|
||||
function utf32SecondNum(code: number): number {
|
||||
return ((code - minUTF32) & 0x3ff) + startUTF32Pair2;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get UTF-32 as UTF-16 sequence
|
||||
*/
|
||||
export function splitUTF32Number(code: number): [number, number] | undefined {
|
||||
if (code >= minUTF32) {
|
||||
return [utf32FirstNum(code), utf32SecondNum(code)];
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if number is UTF-32 split as UTF-16
|
||||
*
|
||||
* Returns:
|
||||
* - 1 if number fits first number in sequence
|
||||
* - 2 if number fits second number in sequence
|
||||
* - false on failure
|
||||
*/
|
||||
export function isUTF32SplitNumber(value: number): 1 | 2 | false {
|
||||
if (value >= startUTF32Pair1) {
|
||||
if (value < startUTF32Pair2) {
|
||||
return 1;
|
||||
}
|
||||
if (value < endUTF32Pair) {
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get UTF-16 sequence as UTF-32
|
||||
*/
|
||||
export function mergeUTF32Numbers(
|
||||
part1: number,
|
||||
part2: number
|
||||
): number | undefined {
|
||||
// Check ranges
|
||||
if (
|
||||
part1 < startUTF32Pair1 ||
|
||||
part1 >= startUTF32Pair2 ||
|
||||
part2 < startUTF32Pair2 ||
|
||||
part2 >= endUTF32Pair
|
||||
) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Merge values
|
||||
return (
|
||||
((part1 - startUTF32Pair1) << 0x0a) +
|
||||
(part2 - startUTF32Pair2) +
|
||||
minUTF32
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert hexadecimal string or number to unicode
|
||||
*/
|
||||
export function getEmojiUnicode(code: number | string): string {
|
||||
return String.fromCodePoint(
|
||||
typeof code === 'number' ? code : getEmojiCodePoint(code)
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert sequence to UTF-16
|
||||
*/
|
||||
export function convertEmojiSequenceToUTF16(numbers: number[]): number[] {
|
||||
const results: number[] = [];
|
||||
for (let i = 0; i < numbers.length; i++) {
|
||||
const code = numbers[i];
|
||||
if (code >= minUTF32) {
|
||||
results.push(utf32FirstNum(code));
|
||||
results.push(utf32SecondNum(code));
|
||||
} else {
|
||||
results.push(code);
|
||||
}
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert sequence to UTF-32
|
||||
*/
|
||||
export function convertEmojiSequenceToUTF32(
|
||||
numbers: number[],
|
||||
throwOnError = true
|
||||
): number[] {
|
||||
const results: number[] = [];
|
||||
for (let i = 0; i < numbers.length; i++) {
|
||||
const code = numbers[i];
|
||||
if (code >= minUTF32) {
|
||||
// Already UTF-32
|
||||
results.push(code);
|
||||
continue;
|
||||
}
|
||||
|
||||
const part = isUTF32SplitNumber(code);
|
||||
if (!part) {
|
||||
// Nothing to convert
|
||||
results.push(code);
|
||||
continue;
|
||||
}
|
||||
|
||||
// UTF-32 code as 2 part sequence
|
||||
if (part === 1 && numbers.length > i + 1) {
|
||||
const merged = mergeUTF32Numbers(code, numbers[i + 1]);
|
||||
if (merged) {
|
||||
// Success
|
||||
i++;
|
||||
results.push(merged);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// Failed to merge UTF-32 sequence
|
||||
if (throwOnError) {
|
||||
const nextCode = numbers[i + 1];
|
||||
throw new Error(
|
||||
`Invalid UTF-16 sequence: ${code.toString(16)}-${
|
||||
nextCode ? nextCode.toString(16) : 'undefined'
|
||||
}`
|
||||
);
|
||||
}
|
||||
results.push(code);
|
||||
}
|
||||
return results;
|
||||
}
|
40
packages/utils/src/emoji/data.ts
Normal file
40
packages/utils/src/emoji/data.ts
Normal file
@ -0,0 +1,40 @@
|
||||
/**
|
||||
* Various codes
|
||||
*/
|
||||
|
||||
// Joiner in emoji sequences
|
||||
export const joinerEmoji = 0x200d;
|
||||
|
||||
// Emoji as icon
|
||||
export const vs16Emoji = 0xfe0f;
|
||||
|
||||
// Keycap, preceeded by mandatory VS16 for full emoji
|
||||
export const keycapEmoji = 0x20e3;
|
||||
|
||||
/**
|
||||
* Variations, UTF-32
|
||||
*
|
||||
* First value in array is minimum, second value is maximum+1
|
||||
*/
|
||||
type Range = [number, number];
|
||||
export const emojiTones: Range[] = [
|
||||
// Skin tones
|
||||
[0x1f3fb, 0x1f400],
|
||||
// Hair tones
|
||||
[0x1f9b0, 0x1f9b4],
|
||||
];
|
||||
|
||||
/**
|
||||
* Minimum UTF-32 number
|
||||
*/
|
||||
export const minUTF32 = 0x10000;
|
||||
|
||||
/**
|
||||
* Codes for UTF-32 characters presented as UTF-16
|
||||
*
|
||||
* startUTF32Pair1 <= code < startUTF32Pair2 -> code for first character in pair
|
||||
* startUTF32Pair2 <= code < endUTF32Pair -> code for second character in pair
|
||||
*/
|
||||
export const startUTF32Pair1 = 0xd800;
|
||||
export const startUTF32Pair2 = 0xdc00;
|
||||
export const endUTF32Pair = 0xe000;
|
131
packages/utils/src/emoji/format.ts
Normal file
131
packages/utils/src/emoji/format.ts
Normal file
@ -0,0 +1,131 @@
|
||||
import {
|
||||
convertEmojiSequenceToUTF16,
|
||||
convertEmojiSequenceToUTF32,
|
||||
} from './convert';
|
||||
|
||||
interface UnicodeOptions {
|
||||
// Prefix before each character '\\u'
|
||||
prefix: string;
|
||||
|
||||
// Separator between characters
|
||||
separator: string;
|
||||
|
||||
// Case conversion
|
||||
case: 'upper' | 'lower';
|
||||
|
||||
// UTF conversion
|
||||
format: 'utf-32' | 'utf-16';
|
||||
|
||||
// Add '0' for code shorter than 4 letters
|
||||
add0: boolean;
|
||||
|
||||
// Throw on error
|
||||
throwOnError: boolean;
|
||||
}
|
||||
|
||||
const defaultUnicodeOptions: UnicodeOptions = {
|
||||
prefix: '',
|
||||
separator: '',
|
||||
case: 'lower',
|
||||
format: 'utf-32',
|
||||
add0: false,
|
||||
throwOnError: true,
|
||||
};
|
||||
|
||||
/**
|
||||
* Convert number to string
|
||||
*/
|
||||
function convert(sequence: number[], options: UnicodeOptions): string {
|
||||
const prefix = options.prefix;
|
||||
const func = options.case === 'upper' ? 'toUpperCase' : 'toLowerCase';
|
||||
|
||||
const cleanSequence =
|
||||
options.format === 'utf-16'
|
||||
? convertEmojiSequenceToUTF16(sequence)
|
||||
: convertEmojiSequenceToUTF32(sequence, options.throwOnError);
|
||||
|
||||
return cleanSequence
|
||||
.map((code) => {
|
||||
let str = code.toString(16);
|
||||
if (options.add0 && str.length < 4) {
|
||||
str = '0'.repeat(4 - str.length) + str;
|
||||
}
|
||||
return prefix + str[func]();
|
||||
})
|
||||
.join(options.separator);
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert unicode number to string
|
||||
*/
|
||||
export function getEmojiUnicodeString(
|
||||
code: number,
|
||||
options: Partial<UnicodeOptions> = {}
|
||||
): string {
|
||||
return convert([code], {
|
||||
...defaultUnicodeOptions,
|
||||
...options,
|
||||
});
|
||||
}
|
||||
|
||||
const defaultSequenceOptions: UnicodeOptions = {
|
||||
...defaultUnicodeOptions,
|
||||
separator: '-',
|
||||
};
|
||||
|
||||
/**
|
||||
* Convert unicode numbers sequence to string
|
||||
*/
|
||||
export function getEmojiSequenceString(
|
||||
sequence: number[],
|
||||
options: Partial<UnicodeOptions> = {}
|
||||
): string {
|
||||
return convert(sequence, {
|
||||
...defaultSequenceOptions,
|
||||
...options,
|
||||
});
|
||||
}
|
||||
|
||||
const regexOptions: UnicodeOptions = {
|
||||
prefix: '\\u',
|
||||
separator: '',
|
||||
case: 'upper',
|
||||
format: 'utf-16',
|
||||
add0: false,
|
||||
throwOnError: true,
|
||||
};
|
||||
|
||||
/**
|
||||
* Merge unicode numbers sequence as regex
|
||||
*/
|
||||
export function emojiSequenceToRegex(
|
||||
sequence: number[],
|
||||
throwOnError = true
|
||||
): string {
|
||||
return convert(sequence, {
|
||||
...regexOptions,
|
||||
throwOnError,
|
||||
});
|
||||
}
|
||||
|
||||
const keywordOptions: UnicodeOptions = {
|
||||
prefix: '',
|
||||
separator: '-',
|
||||
case: 'lower',
|
||||
format: 'utf-32',
|
||||
add0: true,
|
||||
throwOnError: true,
|
||||
};
|
||||
|
||||
/**
|
||||
* Merge unicode numbers sequence as icon keyword
|
||||
*/
|
||||
export function emojiSequenceToKeyword(
|
||||
sequence: number[],
|
||||
throwOnError = true
|
||||
): string {
|
||||
return convert(sequence, {
|
||||
...keywordOptions,
|
||||
throwOnError,
|
||||
});
|
||||
}
|
87
packages/utils/tests/emoji-cleanup-test.ts
Normal file
87
packages/utils/tests/emoji-cleanup-test.ts
Normal file
@ -0,0 +1,87 @@
|
||||
/* eslint-disable @typescript-eslint/no-non-null-assertion */
|
||||
import {
|
||||
getEmojiSequenceFromString,
|
||||
joinEmojiSequences,
|
||||
mapEmojiSequences,
|
||||
removeEmojiTones,
|
||||
removeEmojiVariations,
|
||||
splitEmojiSequences,
|
||||
} from '../lib/emoji/cleanup';
|
||||
|
||||
describe('Testing formatting emoji cleanup', () => {
|
||||
it('UTF-32 sequence', () => {
|
||||
// Convert from string
|
||||
const sequence = getEmojiSequenceFromString(
|
||||
'1F441 FE0F 200D 1F5E8 FE0F '
|
||||
);
|
||||
expect(sequence).toEqual([0x1f441, 0xfe0f, 0x200d, 0x1f5e8, 0xfe0f]);
|
||||
|
||||
// Split
|
||||
const split = splitEmojiSequences(sequence);
|
||||
expect(split).toEqual([
|
||||
[0x1f441, 0xfe0f],
|
||||
[0x1f5e8, 0xfe0f],
|
||||
]);
|
||||
|
||||
// Join again
|
||||
expect(joinEmojiSequences(split)).toEqual(sequence);
|
||||
|
||||
// Remove variations
|
||||
expect(removeEmojiVariations(sequence)).toEqual([
|
||||
0x1f441, 0x200d, 0x1f5e8,
|
||||
]);
|
||||
expect(mapEmojiSequences(split, removeEmojiVariations)).toEqual([
|
||||
[0x1f441],
|
||||
[0x1f5e8],
|
||||
]);
|
||||
|
||||
// Remove tones (does nothing for this sequence)
|
||||
expect(removeEmojiTones(sequence)).toEqual(sequence);
|
||||
expect(mapEmojiSequences(split, removeEmojiTones)).toEqual(split);
|
||||
});
|
||||
|
||||
it('UTF-32 sequence with tones', () => {
|
||||
// Convert from string
|
||||
const sequence = getEmojiSequenceFromString(
|
||||
'1f9d1-1f3ff-200d-1f91d-200d-1f9d1-1f3ff'
|
||||
);
|
||||
expect(sequence).toEqual([
|
||||
0x1f9d1, 0x1f3ff, 0x200d, 0x1f91d, 0x200d, 0x1f9d1, 0x1f3ff,
|
||||
]);
|
||||
|
||||
// Split
|
||||
const split = splitEmojiSequences(sequence);
|
||||
expect(split).toEqual([
|
||||
[0x1f9d1, 0x1f3ff],
|
||||
[0x1f91d],
|
||||
[0x1f9d1, 0x1f3ff],
|
||||
]);
|
||||
|
||||
// Join again
|
||||
expect(joinEmojiSequences(split)).toEqual(sequence);
|
||||
|
||||
// Remove variations (does nothing for this sequence)
|
||||
expect(removeEmojiVariations(sequence)).toEqual(sequence);
|
||||
expect(mapEmojiSequences(split, removeEmojiVariations)).toEqual(split);
|
||||
|
||||
// Remove tones
|
||||
expect(removeEmojiTones(sequence)).toEqual([
|
||||
0x1f9d1, 0x200d, 0x1f91d, 0x200d, 0x1f9d1,
|
||||
]);
|
||||
expect(mapEmojiSequences(split, removeEmojiTones)).toEqual([
|
||||
[0x1f9d1],
|
||||
[0x1f91d],
|
||||
[0x1f9d1],
|
||||
]);
|
||||
|
||||
// Hair tones (bad emoji, second chunk only has tone without emoji)
|
||||
const sequence2 = getEmojiSequenceFromString('1F471 1F3FC-200D 1F3FF');
|
||||
expect(sequence2).toEqual([0x1f471, 0x1f3fc, 0x200d, 0x1f3ff]);
|
||||
const split2 = splitEmojiSequences(sequence2);
|
||||
|
||||
expect(removeEmojiTones(sequence2)).toEqual([0x1f471, 0x200d]);
|
||||
expect(mapEmojiSequences(split2, removeEmojiTones)).toEqual([
|
||||
[0x1f471],
|
||||
]);
|
||||
});
|
||||
});
|
135
packages/utils/tests/emoji-convert-test.ts
Normal file
135
packages/utils/tests/emoji-convert-test.ts
Normal file
@ -0,0 +1,135 @@
|
||||
/* eslint-disable @typescript-eslint/no-non-null-assertion */
|
||||
import {
|
||||
getEmojiCodePoint,
|
||||
getEmojiUnicode,
|
||||
splitUTF32Number,
|
||||
isUTF32SplitNumber,
|
||||
mergeUTF32Numbers,
|
||||
convertEmojiSequenceToUTF16,
|
||||
convertEmojiSequenceToUTF32,
|
||||
} from '../lib/emoji/convert';
|
||||
|
||||
describe('Testing emoji code points', () => {
|
||||
it('UTF-16', () => {
|
||||
// Convert to number
|
||||
const codePoint = getEmojiCodePoint('2764');
|
||||
expect(codePoint).toBe(parseInt('2764', 16));
|
||||
|
||||
// Check for UTF-32
|
||||
expect(splitUTF32Number(codePoint)).toBeUndefined();
|
||||
expect(isUTF32SplitNumber(codePoint)).toBe(false);
|
||||
|
||||
// Convert to character
|
||||
expect(getEmojiUnicode(codePoint)).toBe('\u2764');
|
||||
});
|
||||
|
||||
it('UTF-32', () => {
|
||||
// Convert to number
|
||||
const codePoint = getEmojiCodePoint('1F49A');
|
||||
expect(codePoint).toBe(parseInt('1F49A', 16));
|
||||
expect(isUTF32SplitNumber(codePoint)).toBe(false);
|
||||
|
||||
// Convert to UTF-16 sequence
|
||||
const sequence = splitUTF32Number(codePoint);
|
||||
expect(sequence).toEqual([55357, 56474]);
|
||||
expect(isUTF32SplitNumber(sequence![0])).toBe(1);
|
||||
expect(isUTF32SplitNumber(sequence![1])).toBe(2);
|
||||
|
||||
// Convert back to UTF-32
|
||||
expect(mergeUTF32Numbers(...sequence!)).toBe(codePoint);
|
||||
|
||||
// Convert to string
|
||||
expect(getEmojiUnicode(codePoint)).toBe('\uD83D\uDC9A');
|
||||
});
|
||||
|
||||
it('Sequences to UTF-16', () => {
|
||||
// Nothing to convert
|
||||
expect(convertEmojiSequenceToUTF16([])).toEqual([]);
|
||||
expect(convertEmojiSequenceToUTF16([0x263a, 0xfe0f])).toEqual([
|
||||
0x263a, 0xfe0f,
|
||||
]);
|
||||
|
||||
// UTF-32
|
||||
expect(
|
||||
convertEmojiSequenceToUTF16([
|
||||
0x1f441, 0xfe0f, 0x200d, 0x1f5e8, 0xfe0f,
|
||||
])
|
||||
).toEqual([0xd83d, 0xdc41, 0xfe0f, 0x200d, 0xd83d, 0xdde8, 0xfe0f]);
|
||||
});
|
||||
|
||||
it('Sequences to UTF-32', () => {
|
||||
let thrown: boolean;
|
||||
|
||||
// Nothing to convert
|
||||
expect(convertEmojiSequenceToUTF32([])).toEqual([]);
|
||||
expect(
|
||||
convertEmojiSequenceToUTF32([
|
||||
0x1f441, 0xfe0f, 0x200d, 0x1f5e8, 0xfe0f,
|
||||
])
|
||||
).toEqual([0x1f441, 0xfe0f, 0x200d, 0x1f5e8, 0xfe0f]);
|
||||
|
||||
// UTF-16
|
||||
expect(
|
||||
convertEmojiSequenceToUTF32([
|
||||
0xd83d, 0xdc41, 0xfe0f, 0x200d, 0xd83d, 0xdde8, 0xfe0f,
|
||||
])
|
||||
).toEqual([0x1f441, 0xfe0f, 0x200d, 0x1f5e8, 0xfe0f]);
|
||||
|
||||
// Bad UTF-16: first character is wrong
|
||||
expect(
|
||||
convertEmojiSequenceToUTF32(
|
||||
[0xa83d, 0xdc41, 0xfe0f, 0x200d, 0xd83d, 0xdde8, 0xfe0f],
|
||||
false
|
||||
)
|
||||
).toEqual([0xa83d, 0xdc41, 0xfe0f, 0x200d, 0x1f5e8, 0xfe0f]);
|
||||
thrown = false;
|
||||
try {
|
||||
expect(
|
||||
convertEmojiSequenceToUTF32([
|
||||
0xa83d, 0xdc41, 0xfe0f, 0x200d, 0xd83d, 0xdde8, 0xfe0f,
|
||||
])
|
||||
);
|
||||
} catch {
|
||||
thrown = true;
|
||||
}
|
||||
expect(thrown).toBe(true);
|
||||
|
||||
// Bad UTF-16: second character is wrong
|
||||
expect(
|
||||
convertEmojiSequenceToUTF32(
|
||||
[0xd83d, 0xec41, 0xfe0f, 0x200d, 0xd83d, 0xdde8, 0xfe0f],
|
||||
false
|
||||
)
|
||||
).toEqual([0xd83d, 0xec41, 0xfe0f, 0x200d, 0x1f5e8, 0xfe0f]);
|
||||
thrown = false;
|
||||
try {
|
||||
expect(
|
||||
convertEmojiSequenceToUTF32([
|
||||
0xd83d, 0xec41, 0xfe0f, 0x200d, 0xd83d, 0xdde8, 0xfe0f,
|
||||
])
|
||||
);
|
||||
} catch {
|
||||
thrown = true;
|
||||
}
|
||||
expect(thrown).toBe(true);
|
||||
|
||||
// Bad UTF-16: unexpected end
|
||||
expect(
|
||||
convertEmojiSequenceToUTF32(
|
||||
[0xd83d, 0xdc41, 0xfe0f, 0x200d, 0xd83d],
|
||||
false
|
||||
)
|
||||
).toEqual([0x1f441, 0xfe0f, 0x200d, 0xd83d]);
|
||||
thrown = false;
|
||||
try {
|
||||
expect(
|
||||
convertEmojiSequenceToUTF32([
|
||||
0xd83d, 0xdc41, 0xfe0f, 0x200d, 0xd83d,
|
||||
])
|
||||
);
|
||||
} catch {
|
||||
thrown = true;
|
||||
}
|
||||
expect(thrown).toBe(true);
|
||||
});
|
||||
});
|
55
packages/utils/tests/emoji-format-test.ts
Normal file
55
packages/utils/tests/emoji-format-test.ts
Normal file
@ -0,0 +1,55 @@
|
||||
/* eslint-disable @typescript-eslint/no-non-null-assertion */
|
||||
import {
|
||||
getEmojiSequenceString,
|
||||
emojiSequenceToRegex,
|
||||
emojiSequenceToKeyword,
|
||||
} from '../lib/emoji/format';
|
||||
|
||||
describe('Testing formatting emoji code points', () => {
|
||||
it('Empty sequence', () => {
|
||||
const sequence: number[] = [];
|
||||
expect(getEmojiSequenceString(sequence)).toBe('');
|
||||
expect(emojiSequenceToRegex(sequence)).toBe('');
|
||||
expect(emojiSequenceToKeyword(sequence)).toBe('');
|
||||
});
|
||||
|
||||
it('Keycap sequence', () => {
|
||||
const sequence: number[] = [0x23, 0xfe0f, 0x20e3];
|
||||
|
||||
expect(getEmojiSequenceString(sequence)).toBe('23-fe0f-20e3');
|
||||
expect(emojiSequenceToRegex(sequence)).toBe('\\u23\\uFE0F\\u20E3');
|
||||
expect(emojiSequenceToKeyword(sequence)).toBe('0023-fe0f-20e3');
|
||||
});
|
||||
|
||||
it('UTF-16 sequence', () => {
|
||||
const sequence: number[] = [
|
||||
0xd83d, 0xdc41, 0xfe0f, 0x200d, 0xd83d, 0xdde8, 0xfe0f,
|
||||
];
|
||||
|
||||
// UTF-32 = 0x1f441, 0xfe0f, 0x200d, 0x1f5e8, 0xfe0f
|
||||
expect(getEmojiSequenceString(sequence)).toBe(
|
||||
'1f441-fe0f-200d-1f5e8-fe0f'
|
||||
);
|
||||
expect(emojiSequenceToRegex(sequence)).toBe(
|
||||
'\\uD83D\\uDC41\\uFE0F\\u200D\\uD83D\\uDDE8\\uFE0F'
|
||||
);
|
||||
expect(emojiSequenceToKeyword(sequence)).toBe(
|
||||
'1f441-fe0f-200d-1f5e8-fe0f'
|
||||
);
|
||||
});
|
||||
|
||||
it('UTF-32 sequence', () => {
|
||||
const sequence: number[] = [0x1f441, 0xfe0f, 0x200d, 0x1f5e8, 0xfe0f];
|
||||
|
||||
// UTF-16 = 0xd83d, 0xdc41, 0xfe0f, 0x200d, 0xd83d, 0xdde8, 0xfe0f,
|
||||
expect(getEmojiSequenceString(sequence)).toBe(
|
||||
'1f441-fe0f-200d-1f5e8-fe0f'
|
||||
);
|
||||
expect(emojiSequenceToRegex(sequence)).toBe(
|
||||
'\\uD83D\\uDC41\\uFE0F\\u200D\\uD83D\\uDDE8\\uFE0F'
|
||||
);
|
||||
expect(emojiSequenceToKeyword(sequence)).toBe(
|
||||
'1f441-fe0f-200d-1f5e8-fe0f'
|
||||
);
|
||||
});
|
||||
});
|
5024
packages/utils/tests/fixtures/download-emoji-15.0.txt
vendored
Normal file
5024
packages/utils/tests/fixtures/download-emoji-15.0.txt
vendored
Normal file
File diff suppressed because it is too large
Load Diff
176
packages/utils/tests/validate-emoji-unicode-test.ts
Normal file
176
packages/utils/tests/validate-emoji-unicode-test.ts
Normal file
@ -0,0 +1,176 @@
|
||||
/* eslint-disable @typescript-eslint/no-non-null-assertion */
|
||||
import { readFile, writeFile, unlink } from 'node:fs/promises';
|
||||
import { getEmojiCodePoint, splitUTF32Number } from '../lib/emoji/convert';
|
||||
import {
|
||||
startUTF32Pair1,
|
||||
startUTF32Pair2,
|
||||
endUTF32Pair,
|
||||
} from '../lib/emoji/data';
|
||||
|
||||
// Emoji types
|
||||
type EmojiType =
|
||||
| 'component'
|
||||
| 'fully-qualified'
|
||||
| 'minimally-qualified'
|
||||
| 'unqualified';
|
||||
const componentType: EmojiType = 'component';
|
||||
|
||||
// Allowed types, in order of conversion
|
||||
const allowedTypes: Set<EmojiType> = new Set([
|
||||
componentType,
|
||||
'fully-qualified',
|
||||
'minimally-qualified',
|
||||
'unqualified',
|
||||
]);
|
||||
|
||||
describe('Testing emoji code points', () => {
|
||||
it('Checking available ranges', async () => {
|
||||
// Fetch emojis, cache it
|
||||
const version = '15.0';
|
||||
const source = `tests/fixtures/download-emoji-${version}.txt`;
|
||||
|
||||
let data: string | undefined;
|
||||
try {
|
||||
data = await readFile(source, 'utf8');
|
||||
} catch {
|
||||
//
|
||||
}
|
||||
|
||||
if (!data) {
|
||||
data = (
|
||||
await (
|
||||
await fetch(
|
||||
`https://unicode.org/Public/emoji/${version}/emoji-test.txt`
|
||||
)
|
||||
).text()
|
||||
).toString();
|
||||
await writeFile(source, data, 'utf8');
|
||||
}
|
||||
|
||||
// Test content, unlink cache on failure
|
||||
if (data.indexOf(`# Version: ${version}`) === -1) {
|
||||
try {
|
||||
await unlink(source);
|
||||
} catch {
|
||||
//
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// Get all emojis
|
||||
const utf16: Set<string> = new Set();
|
||||
const utf32: Set<string> = new Set();
|
||||
data.split('\n').forEach((line) => {
|
||||
line = line.trim();
|
||||
const parts = line.split('#');
|
||||
if (parts.length < 2) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Get code and type from first chunk
|
||||
const firstChunk = (parts.shift() as string).trim();
|
||||
if (!firstChunk) {
|
||||
// Empty first chunk: a comment
|
||||
return;
|
||||
}
|
||||
const firstChunkParts = firstChunk.split(';');
|
||||
if (firstChunkParts.length !== 2) {
|
||||
return;
|
||||
}
|
||||
const text = firstChunkParts[0].trim();
|
||||
const code = text.toLowerCase().replace(/\s+/g, '-');
|
||||
if (!code) {
|
||||
return;
|
||||
}
|
||||
const type = firstChunkParts[1].trim() as EmojiType;
|
||||
if (!allowedTypes.has(type)) {
|
||||
throw new Error(`Bad emoji type: ${type}`);
|
||||
}
|
||||
|
||||
// Add code
|
||||
code.split('-').forEach((chunk) => {
|
||||
switch (chunk.length) {
|
||||
case 2:
|
||||
case 4:
|
||||
utf16.add(chunk);
|
||||
break;
|
||||
|
||||
case 5:
|
||||
utf32.add(chunk);
|
||||
break;
|
||||
|
||||
default:
|
||||
throw new Error(`Bad emoji code: ${text}`);
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
// Code points should not be empty
|
||||
expect(utf16.size).toBeGreaterThan(0);
|
||||
expect(utf32.size).toBeGreaterThan(0);
|
||||
|
||||
// Get min/max values
|
||||
interface Range {
|
||||
min: number;
|
||||
max: number;
|
||||
}
|
||||
|
||||
function add(code: number, range: Range | undefined): Range {
|
||||
if (!range) {
|
||||
return {
|
||||
min: code,
|
||||
max: code,
|
||||
};
|
||||
}
|
||||
range.min = Math.min(range.min, code);
|
||||
range.max = Math.max(range.max, code);
|
||||
return range;
|
||||
}
|
||||
|
||||
// ... for UTF-16 code points
|
||||
let utf16Range: Range | undefined;
|
||||
utf16.forEach((str) => {
|
||||
const code = getEmojiCodePoint(str);
|
||||
if (code > startUTF32Pair1 && code < endUTF32Pair) {
|
||||
throw new Error(`UTF16 in UTF32 range: ${str}`);
|
||||
}
|
||||
utf16Range = add(code, utf16Range);
|
||||
});
|
||||
|
||||
// ... for UTF-32 code points
|
||||
let utf32FirstRange: Range | undefined;
|
||||
let utf32SecondRange: Range | undefined;
|
||||
utf32.forEach((str) => {
|
||||
const pair = splitUTF32Number(getEmojiCodePoint(str));
|
||||
if (pair) {
|
||||
utf32FirstRange = add(pair[0], utf32FirstRange);
|
||||
utf32SecondRange = add(pair[1], utf32SecondRange);
|
||||
} else {
|
||||
throw new Error(`Unexpected item in UTF32 set: ${str}`);
|
||||
}
|
||||
});
|
||||
|
||||
// Check UTF-32 emoji ranges
|
||||
expect(utf32FirstRange).toBeDefined();
|
||||
expect(utf32FirstRange!.min).toBeGreaterThanOrEqual(startUTF32Pair1);
|
||||
expect(utf32FirstRange!.max).toBeLessThan(startUTF32Pair2);
|
||||
|
||||
expect(utf32SecondRange).toBeDefined();
|
||||
expect(utf32SecondRange!.min).toBeGreaterThanOrEqual(startUTF32Pair2);
|
||||
expect(utf32SecondRange!.max).toBeLessThan(endUTF32Pair);
|
||||
|
||||
// Dump ranges
|
||||
/*
|
||||
function dump(item: Range | undefined): string {
|
||||
if (!item) {
|
||||
return 'undefined';
|
||||
}
|
||||
return `${item.min} - ${item.max} (0x${item.min
|
||||
.toString(16)
|
||||
.toUpperCase()} - 0x${item.max.toString(16).toUpperCase()})`;
|
||||
}
|
||||
console.log('UTF16:', dump(utf16Range));
|
||||
console.log('UTF32:', dump(utf32FirstRange), dump(utf32SecondRange));
|
||||
*/
|
||||
});
|
||||
});
|
Loading…
Reference in New Issue
Block a user