2
0
mirror of https://github.com/iconify/iconify.git synced 2025-01-06 07:20:40 +00:00

feat(utils): handle keywords when generating regex

This commit is contained in:
Vjacheslav Trushkin 2022-12-22 11:53:00 +02:00
parent a8ce57b7a9
commit 85c12955c0
4 changed files with 72 additions and 18 deletions

View File

@ -1,6 +1,5 @@
import { getEmojiCodePoint } from './convert';
import { emojiComponents, joinerEmoji, vs16Emoji } from './data';
import { getEmojiSequenceKeyword } from './format';
import { joinerEmoji, vs16Emoji } from './data';
/**
* Get emoji sequence from string
@ -18,6 +17,32 @@ export function getEmojiSequenceFromString(value: string): number[] {
.map(getEmojiCodePoint);
}
/**
* Convert emoji sequence or keyword
*
* If sequence is characters list, like '1f441-fe0f', it will be converted to [0x1f441, 0xfe0f]
* If sequence contains anything other than [0-9A-F-\s], it will be converted character by character
*
* This is used to treat keywords, like ':cat:' differently when converting strings to sequences
*/
export function getSequenceFromEmojiStringOrKeyword(value: string): number[] {
if (!value.match(/^[0-9a-fA-F-\s]+$/)) {
// Treat as string
const results: number[] = [];
for (const codePoint of value) {
const code = codePoint.codePointAt(0);
if (code) {
results.push(code);
} else {
// Something went wrong
return getEmojiSequenceFromString(value);
}
}
return results;
}
return getEmojiSequenceFromString(value);
}
/**
* Split emoji sequence by joiner
*

View File

@ -1,5 +1,3 @@
import { getEmojiUnicodeString, UnicodeFormattingOptions } from '../format';
/**
* Regex in item
*/
@ -90,22 +88,34 @@ export type EmojiItemRegex =
| SetEmojiItemRegex
| OptionalEmojiItemRegex;
/**
* Options for converting number to string
*/
const numberToStringOptions: Partial<UnicodeFormattingOptions> = {
prefix: '\\u',
separator: '',
case: 'upper',
format: 'utf-16',
add0: true,
};
/**
* Convert number to string
*/
function toString(number: number): string {
return getEmojiUnicodeString(number, numberToStringOptions);
if (number < 255) {
// Hex or character
if (number > 32 && number < 127) {
// Character
const char = String.fromCharCode(number);
if (
// 0-9
(number > 47 && number < 58) ||
// A-Z
(number > 64 && number < 91) ||
// _`a-z
(number > 94 && number < 123)
) {
return char;
}
return '\\' + char;
}
return (
'\\x' + (number < 16 ? '0' : '') + number.toString(16).toUpperCase()
);
}
// Unicode
return '\\u' + number.toString(16).toUpperCase();
}
/**

View File

@ -1,4 +1,4 @@
import { getEmojiSequenceFromString } from '../cleanup';
import { getSequenceFromEmojiStringOrKeyword } from '../cleanup';
import { convertEmojiSequenceToUTF32 } from '../convert';
import { getQualifiedEmojiVariations } from '../test/variations';
import { createEmojisTree, parseEmojiTree } from './tree';
@ -42,7 +42,9 @@ export function createOptimisedRegex(
): string {
// Convert to numbers
let sequences = emojis.map((item) =>
typeof item === 'string' ? getEmojiSequenceFromString(item) : item
typeof item === 'string'
? getSequenceFromEmojiStringOrKeyword(item)
: item
);
// Add variations

View File

@ -16,6 +16,23 @@ E1.0 grinning face: ${String.fromCodePoint(0x1f600)}
expect(matches?.[1]).toBe(String.fromCodePoint(0x1f603));
});
it('Keywords', () => {
const regexValue = createOptimisedRegex([
':cat:',
':gray_cat:',
':tabby_cat:',
]);
const matches = `
Cat: :cat:
Tabby cat: :tabby_cat:
`.match(new RegExp(regexValue, 'g'));
expect(matches?.length).toBe(2);
expect(matches?.[0]).toBe(':cat:');
expect(matches?.[1]).toBe(':tabby_cat:');
});
it('Sequences', () => {
const regexValue = createOptimisedRegex([
// Emoji with optional variation