2022-12-22 11:53:00 +02:00
|
|
|
import { getSequenceFromEmojiStringOrKeyword } from '../cleanup';
|
2022-12-09 20:10:47 +02:00
|
|
|
import { convertEmojiSequenceToUTF32 } from '../convert';
|
2022-12-16 10:48:17 +02:00
|
|
|
import { getQualifiedEmojiVariations } from '../test/variations';
|
2022-12-09 20:10:47 +02:00
|
|
|
import { createEmojisTree, parseEmojiTree } from './tree';
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Create optimised regex
|
|
|
|
*/
|
|
|
|
export function createOptimisedRegexForEmojiSequences(
|
|
|
|
sequences: number[][]
|
|
|
|
): string {
|
|
|
|
// Convert to UTF-32
|
|
|
|
sequences = sequences.map((item) => convertEmojiSequenceToUTF32(item));
|
|
|
|
|
|
|
|
// Create tree
|
|
|
|
const tree = createEmojisTree(sequences);
|
|
|
|
|
|
|
|
// Optimise
|
|
|
|
const regex = parseEmojiTree(tree);
|
|
|
|
|
|
|
|
// Return regex
|
|
|
|
return regex.regex;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Create optimised regex for emojis
|
2022-12-10 13:17:26 +02:00
|
|
|
*
|
|
|
|
* First parameter is array of emojis, entry can be either list of
|
|
|
|
* code points or emoji sequence as a string
|
|
|
|
*
|
|
|
|
* Examples of acceptable strings (case insensitive):
|
|
|
|
* '1F636 200D 1F32B FE0F' - space separated UTF32 sequence
|
|
|
|
* '1f636-200d-1f32b-fe0f' - dash separated UTF32 sequence
|
|
|
|
* 'd83d-de36-200d-d83c-df2b-fe0f' - dash separated UTF16 sequence
|
|
|
|
* '\\uD83D\\uDE36\\u200D\\uD83C\\uDF2B\\uFE0F' - UTF16 sequence escaped with '\\u'
|
|
|
|
*
|
|
|
|
* All examples above refer to the same emoji and will generate the same regex result
|
2022-12-09 20:10:47 +02:00
|
|
|
*/
|
2023-01-01 20:59:37 +02:00
|
|
|
export function createOptimisedRegex(emojis: (string | number[])[]): string {
|
2022-12-09 20:10:47 +02:00
|
|
|
// Convert to numbers
|
2022-12-10 10:15:34 +02:00
|
|
|
let sequences = emojis.map((item) =>
|
2022-12-22 11:53:00 +02:00
|
|
|
typeof item === 'string'
|
|
|
|
? getSequenceFromEmojiStringOrKeyword(item)
|
|
|
|
: item
|
2022-12-10 10:15:34 +02:00
|
|
|
);
|
2022-12-09 20:10:47 +02:00
|
|
|
|
|
|
|
// Add variations
|
2022-12-24 20:44:29 +02:00
|
|
|
// Temporary convert to object with 'sequence' property
|
|
|
|
sequences = getQualifiedEmojiVariations(
|
|
|
|
sequences.map((sequence) => {
|
|
|
|
return {
|
|
|
|
sequence,
|
|
|
|
};
|
2023-01-01 20:59:37 +02:00
|
|
|
})
|
2022-12-24 20:44:29 +02:00
|
|
|
).map((item) => item.sequence);
|
2022-12-09 20:10:47 +02:00
|
|
|
|
|
|
|
// Parse
|
|
|
|
return createOptimisedRegexForEmojiSequences(sequences);
|
|
|
|
}
|