fix: various fixes for emojis, functions to find and replace emojis in text

2024-11-09 23:00:56 +00:00 · 2022-12-14 20:49:50 +02:00 · 2022-12-14 20:49:50 +02:00 · 9f5be0f10d
commit 9f5be0f10d
parent 0410d73067
13 changed files with 643 additions and 88 deletions
--- a/packages/utils/package.json
+++ b/packages/utils/package.json
@ -127,6 +127,16 @@
 			"import": "./lib/emoji/regex/tree.mjs",
 			"types": "./lib/emoji/regex/tree.d.ts"
 		},
+		"./lib/emoji/replace/find": {
+			"require": "./lib/emoji/replace/find.cjs",
+			"import": "./lib/emoji/replace/find.mjs",
+			"types": "./lib/emoji/replace/find.d.ts"
+		},
+		"./lib/emoji/replace/replace": {
+			"require": "./lib/emoji/replace/replace.cjs",
+			"import": "./lib/emoji/replace/replace.mjs",
+			"types": "./lib/emoji/replace/replace.d.ts"
+		},
 		"./lib/emoji/cleanup": {
 			"require": "./lib/emoji/cleanup.cjs",
 			"import": "./lib/emoji/cleanup.mjs",
--- a/packages/utils/src/emoji/cleanup.ts
+++ b/packages/utils/src/emoji/cleanup.ts
@ -1,8 +1,14 @@
 import { getEmojiCodePoint } from './convert';
 import { emojiTones, joinerEmoji, vs16Emoji } from './data';
+import { getEmojiSequenceString } from './format';

 /**
 * Get emoji sequence from string
+ *
+ * Examples (shows same emoji sequence formatted differently):
+ *  '1F441 FE0F 200D 1F5E8 FE0F' => [0x1f441, 0xfe0f, 0x200d, 0x1f5e8, 0xfe0f]
+ *  '1f441-fe0f-200d-1f5e8-fe0f' => [0x1f441, 0xfe0f, 0x200d, 0x1f5e8, 0xfe0f]
+ *  '\\uD83D\\uDC41\\uFE0F\\u200D\\uD83D\\uDDE8\\uFE0F' => [0x1f441, 0xfe0f, 0x200d, 0x1f5e8, 0xfe0f]
 */
 export function getEmojiSequenceFromString(value: string): number[] {
 	return value
@ -13,7 +19,12 @@ export function getEmojiSequenceFromString(value: string): number[] {
 }

 /**
- * Split sequence by joiner
+ * Split emoji sequence by joiner
+ *
+ * Result represents one emoji, split in smaller sequences separated by 0x200D
+ *
+ * Example:
+ * 	[0x1FAF1, 0x1F3FB, 0x200D, 0x1FAF2, 0x1F3FC] => [[0x1FAF1, 0x1F3FB], [0x1FAF2, 0x1F3FC]]
 */
 export function splitEmojiSequences(sequence: number[]): number[][] {
 	const results: number[][] = [];
@ -33,6 +44,11 @@ export function splitEmojiSequences(sequence: number[]): number[][] {

 /**
 * Join emoji sequences
+ *
+ * Parameter represents one emoji, split in smaller sequences
+ *
+ * Example:
+ * 	[[0x1FAF1, 0x1F3FB], [0x1FAF2, 0x1F3FC]] => [0x1FAF1, 0x1F3FB, 0x200D, 0x1FAF2, 0x1F3FC]
 */
 export function joinEmojiSequences(sequences: number[][]): number[] {
 	let results: number[] = [];
@ -69,7 +85,19 @@ export function removeEmojiTones(sequence: number[]): number[] {
 	});
 }

+/**
+ * Types for mapEmojiSequence()
+ */
 type MapCallback = (sequence: number[]) => number[];
+interface MapOptions {
+	removeEmpty?: boolean;
+	removeDuplicates?: boolean;
+}
+
+const mapOptions: Required<MapOptions> = {
+	removeEmpty: true,
+	removeDuplicates: false,
+};

 /**
 * Run function on sequences
@ -79,10 +107,35 @@ type MapCallback = (sequence: number[]) => number[];
 export function mapEmojiSequences(
 	sequences: number[][],
 	callback: MapCallback,
-	removeEmpty = true
+	options: MapOptions = {}
 ): number[][] {
-	const results = sequences.map((sequence) => callback(sequence));
-	return removeEmpty
-		? results.filter((sequence) => sequence.length > 0)
-		: results;
+	const fullOptions = {
+		...mapOptions,
+		...options,
+	};
+	const values: Set<string> = new Set();
+	const results: number[][] = [];
+
+	sequences.forEach((sequence) => {
+		const result = callback(sequence);
+
+		// Check for empty sequences
+		if (fullOptions.removeEmpty && !result.length) {
+			return;
+		}
+
+		// Check for duplicate
+		if (fullOptions.removeDuplicates) {
+			const value = getEmojiSequenceString(result);
+			if (values.has(value)) {
+				// duplicate
+				return;
+			}
+			values.add(value);
+		}
+
+		results.push(result);
+	});
+
+	return results;
 }
--- a/packages/utils/src/emoji/format.ts
+++ b/packages/utils/src/emoji/format.ts
@ -60,6 +60,9 @@ function convert(

 /**
 * Convert unicode number to string
+ *
+ * Example:
+ * 	0x1F600 => '1F600'
 */
 export function getEmojiUnicodeString(
 	code: number,
@ -78,6 +81,9 @@ const defaultSequenceOptions: UnicodeFormattingOptions = {

 /**
 * Convert unicode numbers sequence to string
+ *
+ * Example:
+ * 	[0x1f441, 0xfe0f] => '1f441-fe0f'
 */
 export function getEmojiSequenceString(
 	sequence: number[],
@ -88,25 +94,3 @@ export function getEmojiSequenceString(
 		...options,
 	});
 }
-
-const keywordOptions: UnicodeFormattingOptions = {
-	prefix: '',
-	separator: '-',
-	case: 'lower',
-	format: 'utf-32',
-	add0: true,
-	throwOnError: true,
-};
-
-/**
- * Merge unicode numbers sequence as icon keyword
- */
-export function emojiSequenceToKeyword(
-	sequence: number[],
-	throwOnError = true
-): string {
-	return convert(sequence, {
-		...keywordOptions,
-		throwOnError,
-	});
-}
--- a/packages/utils/src/emoji/parse-test.ts
+++ b/packages/utils/src/emoji/parse-test.ts
@ -1,5 +1,7 @@
 import { getEmojiSequenceFromString } from './cleanup';
 import { convertEmojiSequenceToUTF32 } from './convert';
+import { getEmojiSequenceString } from './format';
+import { getUnqualifiedEmojiSequence } from './variations';

 // Emoji types
 type EmojiType =
@ -62,3 +64,50 @@ export function parseEmojiTestFile(data: string): number[][] {
 		convertEmojiSequenceToUTF32(getEmojiSequenceFromString(item))
 	);
 }
+
+/**
+ * Get qualified variations from parsed test file
+ *
+ * Key is unqualified emoji, value is longest fully qualified emoji
+ */
+export function getQualifiedEmojiSequencesMap(
+	sequences: number[][]
+): Map<number[], number[]>;
+export function getQualifiedEmojiSequencesMap(
+	sequences: number[][],
+	toString: (value: number[]) => string
+): Record<string, string>;
+export function getQualifiedEmojiSequencesMap(
+	sequences: number[][],
+	toString?: (value: number[]) => string
+): Map<number[], number[]> | Record<string, string> {
+	const convert = toString || getEmojiSequenceString;
+	const results = Object.create(null) as Record<string, string>;
+
+	for (let i = 0; i < sequences.length; i++) {
+		const value = convert(sequences[i]);
+		const unqualified = convert(getUnqualifiedEmojiSequence(sequences[i]));
+		// Check if values mismatch, set results to longest value
+		if (
+			!results[unqualified] ||
+			results[unqualified].length < value.length
+		) {
+			results[unqualified] = value;
+		}
+	}
+
+	// Return
+	if (toString) {
+		return results;
+	}
+
+	const map: Map<number[], number[]> = new Map();
+	for (const key in results) {
+		const value = results[key];
+		map.set(
+			getEmojiSequenceFromString(key),
+			getEmojiSequenceFromString(value)
+		);
+	}
+	return map;
+}
--- a/packages/utils/src/emoji/replace/find.ts
+++ b/packages/utils/src/emoji/replace/find.ts
@ -0,0 +1,176 @@
+import { convertEmojiSequenceToUTF32 } from '../convert';
+import { vs16Emoji } from '../data';
+import { getEmojiSequenceString } from '../format';
+
+/**
+ * Create regular expression instance
+ */
+export function createEmojiRegExp(regexp: string): RegExp {
+	return new RegExp(regexp, 'g');
+}
+
+/**
+ * Match
+ */
+export interface EmojiRegexMatch {
+	// Match to replace in text
+	match: string;
+
+	// Sequence
+	sequence: number[];
+
+	// Icon name
+	keyword: string;
+}
+
+/**
+ * Add prev/next
+ */
+interface PrevMatch {
+	// Match
+	match: EmojiRegexMatch;
+
+	// Content between previous emoji and this emoji
+	prev: string;
+}
+
+interface PrevNextMatch extends PrevMatch {
+	// Content betweed this emoji and next emoji
+	next: string;
+}
+
+/**
+ * Find emojis in text
+ *
+ * Returns only one entry per match
+ */
+export function getEmojiMatchesInText(
+	regexp: string | RegExp,
+	content: string
+): EmojiRegexMatch[] {
+	const results: EmojiRegexMatch[] = [];
+	const matches = content.match(
+		typeof regexp === 'string' ? createEmojiRegExp(regexp) : regexp
+	);
+
+	if (matches) {
+		// Sort matches by length to make sure longest matches get replaced first
+		matches.sort((a, b) => {
+			if (b.length === a.length) {
+				return a.localeCompare(b);
+			}
+			return b.length - a.length;
+		});
+
+		// Add all matches
+		let lastMatch: EmojiRegexMatch | undefined;
+		for (let i = 0; i < matches.length; i++) {
+			const match = matches[i];
+
+			if (lastMatch && lastMatch.match === match) {
+				continue;
+			}
+
+			// Get sequence
+			const sequence: number[] = [];
+			for (const codePoint of match) {
+				const num = codePoint.codePointAt(0) as number;
+				if (num !== vs16Emoji) {
+					sequence.push(num);
+				}
+			}
+
+			// Get keyword
+			const keyword = getEmojiSequenceString(
+				convertEmojiSequenceToUTF32(sequence)
+			);
+
+			lastMatch = {
+				match,
+				sequence,
+				keyword,
+			};
+			results.push(lastMatch);
+		}
+	}
+
+	return results;
+}
+
+/**
+ * Sort emojis, get prev and next text
+ */
+export function sortEmojiMatchesInText(
+	content: string,
+	matches: EmojiRegexMatch[]
+): PrevNextMatch[] {
+	// Find all ranges
+	interface Range {
+		match: EmojiRegexMatch;
+		start: number;
+		end: number;
+	}
+	const ranges: Range[] = [];
+
+	const check = (start: number, end: number): boolean => {
+		for (let i = 0; i < ranges.length; i++) {
+			if (start < ranges[i].end && end >= ranges[i].start) {
+				return false;
+			}
+		}
+		return true;
+	};
+
+	for (let i = 0; i < matches.length; i++) {
+		const match = matches[i];
+		const search = match.match;
+
+		let startFrom = 0;
+		let start: number;
+		while ((start = content.indexOf(search, startFrom)) !== -1) {
+			const end = start + search.length;
+			startFrom = end;
+
+			// Make sure it doesn't interfere with other replacements
+			if (check(start, end)) {
+				ranges.push({
+					start,
+					end,
+					match,
+				});
+			}
+		}
+	}
+
+	// Sort ranges
+	ranges.sort((a, b) => a.start - b.start);
+	const list: PrevMatch[] = [];
+	let prevRange: Range | undefined;
+	let lastEnd: number | undefined;
+	for (let i = 0; i < ranges.length; i++) {
+		const range = ranges[i];
+		const prev = content.slice(prevRange ? prevRange.end : 0, range.start);
+		list.push({
+			match: range.match,
+			prev,
+		});
+		prevRange = range;
+		lastEnd = range.end;
+	}
+
+	// Convert to full data
+	if (!lastEnd) {
+		// Empty list
+		return [];
+	}
+
+	const replacements: PrevNextMatch[] = list.map((item, index) => {
+		const nextItem = list[index + 1];
+		return {
+			...item,
+			next: nextItem ? nextItem.prev : content.slice(lastEnd),
+		};
+	});
+
+	return replacements;
+}
--- a/packages/utils/src/emoji/replace/replace.ts
+++ b/packages/utils/src/emoji/replace/replace.ts
@ -0,0 +1,60 @@
+import {
+	EmojiRegexMatch,
+	getEmojiMatchesInText,
+	sortEmojiMatchesInText,
+} from './find';
+
+/**
+ * Callback for replacing emoji in text
+ *
+ * Returns text to replace emoji with, undefined to skip replacement
+ */
+export type FindAndReplaceEmojisInTextCallback = (
+	// Match
+	match: EmojiRegexMatch,
+	// Text before replacement
+	prev: string
+) => string | undefined;
+
+/**
+ * Find and replace emojis in text
+ *
+ * Returns null if nothing was replaced
+ */
+export function findAndReplaceEmojisInText(
+	regexp: string | RegExp,
+	content: string,
+	callback: FindAndReplaceEmojisInTextCallback
+): string | null {
+	const matches = getEmojiMatchesInText(regexp, content);
+	if (!matches.length) {
+		return null;
+	}
+
+	const sortedMatches = sortEmojiMatchesInText(content, matches);
+
+	// Replace all matches
+	let result = '';
+	let replaced = false;
+	for (let i = 0; i < sortedMatches.length; i++) {
+		const item = sortedMatches[i];
+		result += item.prev;
+		const replacement = callback(
+			{
+				...item.match,
+			},
+			result
+		);
+		if (replacement === void 0) {
+			// Nothing to replace
+			result += item.match.match;
+		} else {
+			// Replace content
+			result += replacement;
+			replaced = true;
+		}
+	}
+	result += sortedMatches[sortedMatches.length - 1].next;
+
+	return replaced ? result : null;
+}
--- a/packages/utils/src/emoji/variations.ts
+++ b/packages/utils/src/emoji/variations.ts
@ -7,51 +7,26 @@ import {
 import { convertEmojiSequenceToUTF32 } from './convert';
 import { keycapEmoji, vs16Emoji } from './data';
 import { getEmojiSequenceString } from './format';
+import { getQualifiedEmojiSequencesMap } from './parse-test';

 /**
- * Add optional variations to emojis
- *
- * Also converts list to UTF-32 as needed
- *
- * `testData`, returned by parseEmojiTestFile() is used to check which emojis have `FE0F` variations.
- * If missing or emoji is missing in test data, `FE0F` is added to every single code emoji.
+ * Get unqualified sequence
 */
-export function addOptionalEmojiVariations(
-	sequences: number[][],
-	testData?: number[][]
-): number[][] {
-	// Map test data
-	const testDataMap = Object.create(null) as Record<string, string>;
-	testData?.forEach((sequence) => {
-		const convertedSequence = convertEmojiSequenceToUTF32(sequence);
+export function getUnqualifiedEmojiSequence(sequence: number[]): number[] {
+	return sequence.filter((num) => num !== vs16Emoji);
+}

-		// Clean up sequence
-		const key = getEmojiSequenceString(
-			removeEmojiVariations(convertedSequence)
-		);
-		if (testDataMap[key]?.length > convertedSequence.length) {
-			// Already got version with more variations
-			return;
-		}
-
-		testDataMap[key] = getEmojiSequenceString(convertedSequence);
-	});
-
-	// Parse all sequences
-	const set: Set<string> = new Set();
-
-	sequences.forEach((sequence) => {
-		const convertedSequence = convertEmojiSequenceToUTF32(sequence);
-		const cleanSequence = removeEmojiVariations(convertedSequence);
-		const mapKey = getEmojiSequenceString(cleanSequence);
-		if (testDataMap[mapKey]) {
-			// Got item from test data
-			set.add(testDataMap[mapKey]);
-			return;
-		}
-
-		// Emoji is missing in test data: add `FE0F` as needed
-		const parts = splitEmojiSequences(convertedSequence).map((part) => {
+/**
+ * Get qualified sequence, adding optional `FE0F` wherever it might exist
+ *
+ * This might result in sequence that is not actually valid, but considering
+ * that `FE0F` is always treated as optional, full sequence used in regex will
+ * catch both qualified and unqualified emojis, so proper sequence will get
+ * caught anyway. This function just makes sure that in case if sequence does
+ * have `FE0F`, it will be caught by regex too.
+ */
+export function guessQualifiedEmojiSequence(sequence: number[]): number[] {
+	const split = splitEmojiSequences(sequence).map((part) => {
 		// Check for `FE0F`
 		if (part.indexOf(vs16Emoji) !== -1) {
 			return part;
@ -65,9 +40,56 @@ export function addOptionalEmojiVariations(
 		// Add `FE0F` to 1 character emojis
 		return part.length === 1 ? [part[0], vs16Emoji] : part;
 	});
+	return joinEmojiSequences(split);
+}

-		set.add(getEmojiSequenceString(joinEmojiSequences(parts)));
+/**
+ * Add optional variations to emojis
+ *
+ * Also converts list to UTF-32 as needed
+ *
+ * `testData`, returned by parseEmojiTestFile() is used to check which emojis have `FE0F` variations.
+ * If missing or emoji is missing in test data, `FE0F` is added to every single code emoji.
+ */
+export function addOptionalEmojiVariations(
+	sequences: number[][],
+	testData?: number[][]
+): number[][];
+export function addOptionalEmojiVariations(
+	sequences: number[][],
+	testData: number[][],
+	toString: (value: number[]) => string
+): string[];
+export function addOptionalEmojiVariations(
+	sequences: number[][],
+	testData: number[][] = [],
+	toString?: (value: number[]) => string
+): number[][] | string[] {
+	const convert = toString || getEmojiSequenceString;
+
+	// Map test data
+	const testDataMap = getQualifiedEmojiSequencesMap(testData, convert);
+
+	// Parse all sequences
+	const set: Set<string> = new Set();
+
+	sequences.forEach((sequence) => {
+		// Convert to UTF-32, remove variations
+		const convertedSequence = convertEmojiSequenceToUTF32(sequence);
+		const cleanSequence = removeEmojiVariations(convertedSequence);
+
+		// Check test data
+		const mapKey = convert(cleanSequence);
+		if (testDataMap[mapKey]) {
+			// Got item from test data
+			set.add(testDataMap[mapKey]);
+			return;
+		}
+
+		// Not in test data: guess variations
+		set.add(convert(guessQualifiedEmojiSequence(cleanSequence)));
 	});

-	return Array.from(set).map(getEmojiSequenceFromString);
+	const results = Array.from(set);
+	return toString ? results : results.map(getEmojiSequenceFromString);
 }
--- a/packages/utils/src/index.ts
+++ b/packages/utils/src/index.ts
@ -86,8 +86,6 @@ export { loadIcon } from './loader/loader';
 // Emojis
 export {
 	getEmojiSequenceFromString,
-	splitEmojiSequences,
-	joinEmojiSequences,
 	removeEmojiVariations,
 	removeEmojiTones,
 	mapEmojiSequences,
@ -101,17 +99,20 @@ export {
 	convertEmojiSequenceToUTF16,
 	convertEmojiSequenceToUTF32,
 } from './emoji/convert';
+export { getEmojiUnicodeString, getEmojiSequenceString } from './emoji/format';
 export {
-	getEmojiUnicodeString,
-	getEmojiSequenceString,
-	emojiSequenceToKeyword,
-} from './emoji/format';
-export { parseEmojiTestFile } from './emoji/parse-test';
-export { addOptionalEmojiVariations } from './emoji/variations';
+	parseEmojiTestFile,
+	getQualifiedEmojiSequencesMap,
+} from './emoji/parse-test';
+export {
+	getUnqualifiedEmojiSequence,
+	addOptionalEmojiVariations,
+} from './emoji/variations';
 export {
 	createOptimisedRegex,
 	createOptimisedRegexForEmojiSequences,
 } from './emoji/regex/create';
+export { findAndReplaceEmojisInText } from './emoji/replace/replace';

 // Misc
 export { camelize, camelToKebab, snakelize, pascalize } from './misc/strings';
--- a/packages/utils/tests/emoji-cleanup-test.ts
+++ b/packages/utils/tests/emoji-cleanup-test.ts
@ -1,5 +1,4 @@
-/* eslint-disable @typescript-eslint/no-non-null-assertion */
-import { convertEmojiSequenceToUTF32 } from '../lib';
+import { convertEmojiSequenceToUTF32 } from '../lib/emoji/convert';
 import {
 	getEmojiSequenceFromString,
 	joinEmojiSequences,
--- a/packages/utils/tests/emoji-format-test.ts
+++ b/packages/utils/tests/emoji-format-test.ts
@ -1,21 +1,20 @@
-/* eslint-disable @typescript-eslint/no-non-null-assertion */
-import {
-	getEmojiSequenceString,
-	emojiSequenceToKeyword,
-} from '../lib/emoji/format';
+import { getEmojiSequenceString } from '../lib/emoji/format';

 describe('Testing formatting emoji code points', () => {
 	it('Empty sequence', () => {
 		const sequence: number[] = [];
 		expect(getEmojiSequenceString(sequence)).toBe('');
-		expect(emojiSequenceToKeyword(sequence)).toBe('');
 	});

 	it('Keycap sequence', () => {
 		const sequence: number[] = [0x23, 0xfe0f, 0x20e3];

 		expect(getEmojiSequenceString(sequence)).toBe('23-fe0f-20e3');
-		expect(emojiSequenceToKeyword(sequence)).toBe('0023-fe0f-20e3');
+		expect(
+			getEmojiSequenceString(sequence, {
+				add0: true,
+			})
+		).toBe('0023-fe0f-20e3');
 	});

 	it('UTF-16 sequence', () => {
@ -27,9 +26,6 @@ describe('Testing formatting emoji code points', () => {
 		expect(getEmojiSequenceString(sequence)).toBe(
 			'1f441-fe0f-200d-1f5e8-fe0f'
 		);
-		expect(emojiSequenceToKeyword(sequence)).toBe(
-			'1f441-fe0f-200d-1f5e8-fe0f'
-		);
 	});

 	it('UTF-32 sequence', () => {
@ -39,8 +35,5 @@ describe('Testing formatting emoji code points', () => {
 		expect(getEmojiSequenceString(sequence)).toBe(
 			'1f441-fe0f-200d-1f5e8-fe0f'
 		);
-		expect(emojiSequenceToKeyword(sequence)).toBe(
-			'1f441-fe0f-200d-1f5e8-fe0f'
-		);
 	});
 });
--- a/packages/utils/tests/emoji-optional-variations-test.ts
+++ b/packages/utils/tests/emoji-optional-variations-test.ts
@ -1,8 +1,10 @@
-/* eslint-disable @typescript-eslint/no-non-null-assertion */
 import { readFile, writeFile, unlink } from 'node:fs/promises';
 import { getEmojiSequenceFromString } from '../lib/emoji/cleanup';
 import { getEmojiSequenceString } from '../lib/emoji/format';
-import { parseEmojiTestFile } from '../lib/emoji/parse-test';
+import {
+	getQualifiedEmojiSequencesMap,
+	parseEmojiTestFile,
+} from '../lib/emoji/parse-test';
 import { addOptionalEmojiVariations } from '../lib/emoji/variations';

 describe('Optional variations of emoji sequences', () => {
@ -88,6 +90,23 @@ describe('Optional variations of emoji sequences', () => {
 		}
 		const testData = parseEmojiTestFile(data);

+		// Make sure testData contains both fully-qualified and unqualified emojis
+		const testDataStrings = new Set(
+			testData.map((sequence) => getEmojiSequenceString(sequence))
+		);
+		expect(testDataStrings.has('1f600')).toBe(true);
+		expect(testDataStrings.has('263a')).toBe(true);
+		expect(testDataStrings.has('263a-fe0f')).toBe(true);
+
+		// Test getQualifiedEmojiSequencesMap
+		const unqualifiedTest = getQualifiedEmojiSequencesMap(
+			testData,
+			getEmojiSequenceString
+		);
+		expect(unqualifiedTest['1f600']).toBe('1f600');
+		expect(unqualifiedTest['263a']).toBe('263a-fe0f');
+
+		// Sequences to test
 		const sequences = [
 			// emoji without variation in test file
 			'1F601',
--- a/packages/utils/tests/emoji-regex-find-test.ts
+++ b/packages/utils/tests/emoji-regex-find-test.ts
@ -0,0 +1,107 @@
+import { createOptimisedRegex } from '../lib/emoji/regex/create';
+import {
+	getEmojiMatchesInText,
+	sortEmojiMatchesInText,
+} from '../lib/emoji/replace/find';
+
+describe('Finding emojis in text', () => {
+	it('Simple regex', () => {
+		const regexValue = createOptimisedRegex([
+			'1F600',
+			'1F603',
+			'1F604',
+			'263A FE0F',
+		]);
+
+		const text1 = 'E1.0 grinning face: ';
+		const emoji1 = String.fromCodePoint(0x1f600);
+		const text2 = '\nE0.6 grinning face with big eyes: ';
+		const emoji2 = String.fromCodePoint(0x1f603);
+		const text3 = 'E1.0 grinning face: ';
+		const emoji3 = emoji1;
+		const text4 = 'E0.6 smiling face: ';
+		const emoji4 = '\u263A\uFE0F';
+		const text5 = '(fully-qualified)\nE0.6 smiling face: ';
+		const emoji5 = '\u263A';
+		const text6 = '(unqualified)';
+
+		const content =
+			text1 +
+			emoji1 +
+			text2 +
+			emoji2 +
+			text3 +
+			emoji3 +
+			text4 +
+			emoji4 +
+			text5 +
+			emoji5 +
+			text6;
+		const matches = getEmojiMatchesInText(regexValue, content);
+
+		expect(matches).toEqual([
+			{
+				match: '\u263A\uFE0F',
+				sequence: [0x263a],
+				keyword: '263a',
+			},
+			{
+				// Should be returned only once
+				match: String.fromCodePoint(0x1f600),
+				sequence: [0x1f600],
+				keyword: '1f600',
+			},
+			{
+				match: String.fromCodePoint(0x1f603),
+				sequence: [0x1f603],
+				keyword: '1f603',
+			},
+			{
+				// Same as first, but without 'FE0F'
+				match: '\u263A',
+				sequence: [0x263a],
+				keyword: '263a',
+			},
+		]);
+
+		const sortedMatches = sortEmojiMatchesInText(content, matches);
+		expect(sortedMatches).toEqual([
+			// Same order as in content
+			{
+				match: emoji1,
+				sequence: [0x1f600],
+				keyword: '1f600',
+				prev: text1,
+				next: text2,
+			},
+			{
+				match: emoji2,
+				sequence: [0x1f603],
+				keyword: '1f603',
+				prev: text2,
+				next: text3,
+			},
+			{
+				match: emoji3,
+				sequence: [0x1f600],
+				keyword: '1f600',
+				prev: text3,
+				next: text4,
+			},
+			{
+				match: emoji4,
+				sequence: [0x263a],
+				keyword: '263a',
+				prev: text4,
+				next: text5,
+			},
+			{
+				match: emoji5,
+				sequence: [0x263a],
+				keyword: '263a',
+				prev: text5,
+				next: text6,
+			},
+		]);
+	});
+});
--- a/packages/utils/tests/emoji-regex-replace-test.ts
+++ b/packages/utils/tests/emoji-regex-replace-test.ts
@ -0,0 +1,82 @@
+import { createOptimisedRegex } from '../lib/emoji/regex/create';
+import { findAndReplaceEmojisInText } from '../lib/emoji/replace/replace';
+
+describe('Replacing emojis in text', () => {
+	it('Simple and complex regex matches', () => {
+		const grinningCatEmoji = String.fromCodePoint(0x1f63a);
+		const alienEmoji = String.fromCodePoint(0x1f47d);
+		const testEmoji =
+			String.fromCodePoint(0x1f441) +
+			String.fromCodePoint(0xfe0f) +
+			String.fromCodePoint(0x200d) +
+			String.fromCodePoint(0x1f5e8);
+
+		const sequence = [
+			'1f63a',
+			'1f47d',
+			// 2 emojis that can be sequences of each other
+			'1F441 FE0F',
+			'1F441 FE0F 200D 1F5E8 FE0F',
+			'1F5E8 FE0F',
+		];
+		const regex = createOptimisedRegex(sequence);
+
+		const text =
+			'Grinning Cat: ' +
+			grinningCatEmoji +
+			', aliens: ' +
+			alienEmoji +
+			alienEmoji +
+			alienEmoji +
+			', Test: ' +
+			testEmoji +
+			'end!';
+
+		// Counters
+		let grinningCatCalled = 0;
+		let alienCalled = 0;
+		let testCalled = 0;
+		const replaced = findAndReplaceEmojisInText(
+			regex,
+			text,
+			(match, prev) => {
+				switch (match.match) {
+					case grinningCatEmoji: {
+						expect(prev).toBe('Grinning Cat: ');
+						grinningCatCalled++;
+						return ':cat:';
+					}
+
+					case alienEmoji: {
+						if (alienCalled) {
+							expect(prev).toBe(
+								'Grinning Cat: :cat:, aliens: ' +
+									':alien:'.repeat(alienCalled)
+							);
+						}
+						alienCalled++;
+						return ':alien:';
+					}
+
+					case testEmoji: {
+						testCalled++;
+						return ':test:';
+					}
+
+					default: {
+						throw new Error(
+							`Unexpected match: ${JSON.stringify(match)}`
+						);
+					}
+				}
+			}
+		);
+
+		expect(grinningCatCalled).toBe(1);
+		expect(alienCalled).toBe(3);
+		expect(testCalled).toBe(1);
+		expect(replaced).toBe(
+			'Grinning Cat: :cat:, aliens: :alien::alien::alien:, Test: :test:end!'
+		);
+	});
+});