From b59c927b24f225feb3fb4d83d92ee96d856c48a9 Mon Sep 17 00:00:00 2001 From: hazre Date: Fri, 17 Apr 2026 01:06:22 +0200 Subject: [PATCH] refactor: emoji detection in text --- package.json | 1 - pnpm-lock.yaml | 8 -- .../components/message/MsgTypeRenderers.tsx | 15 ++-- src/app/components/power/PowerIcon.tsx | 27 ++++-- .../plugins/react-custom-html-parser.test.tsx | 14 +++ src/app/plugins/react-custom-html-parser.tsx | 69 +++++++++++---- src/app/utils/emojiDetection.test.ts | 44 ++++++++++ src/app/utils/emojiDetection.ts | 88 +++++++++++++++++++ src/app/utils/regex.ts | 16 ---- 9 files changed, 225 insertions(+), 57 deletions(-) create mode 100644 src/app/utils/emojiDetection.test.ts create mode 100644 src/app/utils/emojiDetection.ts diff --git a/package.json b/package.json index 6734acee4..aaf08ef7f 100644 --- a/package.json +++ b/package.json @@ -51,7 +51,6 @@ "dayjs": "^1.11.21", "domhandler": "^5.0.3", "dompurify": "^3.4.10", - "emoji-regex": "^10.6.0", "emojibase": "^17.0.0", "emojibase-data": "^17.0.0", "eventemitter3": "^5.0.4", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index bd8371dac..34784ff03 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -86,9 +86,6 @@ importers: dompurify: specifier: ^3.4.10 version: 3.4.10 - emoji-regex: - specifier: ^10.6.0 - version: 10.6.0 emojibase: specifier: ^17.0.0 version: 17.0.0 @@ -3374,9 +3371,6 @@ packages: electron-to-chromium@1.5.374: resolution: {integrity: sha512-HCF5i7izveksHSGqa7mhDh6tr3Uz9Dar2RAjwuh69bw3QGPVObjQIgLwQWeO/Rxp9/r0KdboKy9RbpQDl97fjg==} - emoji-regex@10.6.0: - resolution: {integrity: sha512-toUI84YS5YmxW219erniWD0CIVOo46xGKColeNQRgOzDorgBi1v4D71/OFzgD9GO2UGKIv1C3Sp8DAn0+j5w7A==} - emojibase-data@17.0.0: resolution: {integrity: sha512-Yvgb5AWoHViHV/gq1qr5ZAarcBip+B27/ZLRsUJkbgAEaLlZ/fof9g882LTpmEpyhBNEC0m2SEmItljHsTygjA==} peerDependencies: @@ -7943,8 +7937,6 @@ snapshots: electron-to-chromium@1.5.374: {} - emoji-regex@10.6.0: {} - emojibase-data@17.0.0(emojibase@17.0.0): dependencies: emojibase: 17.0.0 diff --git a/src/app/components/message/MsgTypeRenderers.tsx b/src/app/components/message/MsgTypeRenderers.tsx index 4aeda3eb4..7c7d81f68 100644 --- a/src/app/components/message/MsgTypeRenderers.tsx +++ b/src/app/components/message/MsgTypeRenderers.tsx @@ -1,9 +1,8 @@ -import type { CSSProperties, ReactNode } from 'react'; -import { useMemo } from 'react'; -import { Box, Chip, Text, toRem } from 'folds'; +import { type CSSProperties, type ReactNode, useMemo } from 'react'; import { ArrowSquareOut, sizedIcon, Link } from '$components/icons/phosphor'; -import type { IContent, IPreviewUrlResponse, MatrixClient } from '$types/matrix-sdk'; -import { JUMBO_EMOJI_REG } from '$utils/regex'; +import { Box, Chip, Text, toRem } from 'folds'; +import { type IContent, type IPreviewUrlResponse, type MatrixClient } from '$types/matrix-sdk'; +import { isJumboEmojiText } from '$utils/emojiDetection'; import { trimReplyFromBody } from '$utils/room'; import type { IAudioContent, @@ -227,7 +226,7 @@ export function MText({ ) ) return true; - if (!JUMBO_EMOJI_REG.test(trimmedBody)) return false; + if (!isJumboEmojiText(trimmedBody)) return false; if (trimmedBody.includes(':')) { const hasImage = customBody && /]*>/i.test(customBody); @@ -338,7 +337,7 @@ export function MEmote({ return ; } const trimmedBody = trimReplyFromBody(body); - const isJumbo = JUMBO_EMOJI_REG.test(trimmedBody); + const isJumbo = isJumboEmojiText(trimmedBody); const { urls, bundleContent } = getUrlsFromContent(content, renderUrlsPreview); @@ -393,7 +392,7 @@ export function MNotice({ return ; } const trimmedBody = trimReplyFromBody(body); - const isJumbo = JUMBO_EMOJI_REG.test(trimmedBody); + const isJumbo = isJumboEmojiText(trimmedBody); const { urls, bundleContent } = getUrlsFromContent(content, renderUrlsPreview); diff --git a/src/app/components/power/PowerIcon.tsx b/src/app/components/power/PowerIcon.tsx index f86331035..63f9d6d8e 100644 --- a/src/app/components/power/PowerIcon.tsx +++ b/src/app/components/power/PowerIcon.tsx @@ -1,14 +1,29 @@ -import { JUMBO_EMOJI_REG } from '$utils/regex'; +import { isJumboEmojiText } from '$utils/emojiDetection'; import * as css from './style.css'; type PowerIconProps = css.PowerIconVariants & { iconSrc: string; name?: string; }; + +const ALLOWED_ICON_PROTOCOLS = new Set(['http:', 'https:']); + +function getSafeIconUrl(iconSrc: string): string | undefined { + try { + const parsed = new URL(iconSrc); + return ALLOWED_ICON_PROTOCOLS.has(parsed.protocol) ? parsed.href : undefined; + } catch { + return undefined; + } +} + export function PowerIcon({ size, iconSrc, name }: PowerIconProps) { - return JUMBO_EMOJI_REG.test(iconSrc) ? ( - {iconSrc} - ) : ( - {name} - ); + if (isJumboEmojiText(iconSrc, 1)) { + return {iconSrc}; + } + + const safeIconUrl = getSafeIconUrl(iconSrc); + if (!safeIconUrl) return null; + + return {name}; } diff --git a/src/app/plugins/react-custom-html-parser.test.tsx b/src/app/plugins/react-custom-html-parser.test.tsx index 17f929650..f83029540 100644 --- a/src/app/plugins/react-custom-html-parser.test.tsx +++ b/src/app/plugins/react-custom-html-parser.test.tsx @@ -10,6 +10,7 @@ import { getReactCustomHtmlParser, makeMentionCustomProps, renderMatrixMention, + scaleSystemEmoji, } from './react-custom-html-parser'; import { registerMatrixUriProtocol } from './matrix-uri'; import { markdownToHtml } from './markdown/markdownToHtml'; @@ -167,6 +168,19 @@ describe('react custom html parser', () => { expect(img).toHaveAttribute('height', '64'); }); + it.each(['🫩', '🫪', '🫯', '🇩🇪', '🙂‍↔️'])( + 'wraps modern emoji text %s in emoticon markup', + (emoji) => { + const result = scaleSystemEmoji(emoji); + expect(result).toHaveLength(1); + expect(typeof result[0]).not.toBe('string'); + } + ); + + it('does not wrap emojis inside urls', () => { + expect(scaleSystemEmoji('https://example.com/🫩')).toEqual(['https://example.com/🫩']); + }); + it('renders same-origin raw settings links as mention-style chips through the factory link render path', () => { const renderLink = factoryRenderLinkifyWithMention( settingsLinkBaseUrl, diff --git a/src/app/plugins/react-custom-html-parser.tsx b/src/app/plugins/react-custom-html-parser.tsx index c0d237f68..f0fb2116b 100644 --- a/src/app/plugins/react-custom-html-parser.tsx +++ b/src/app/plugins/react-custom-html-parser.tsx @@ -17,8 +17,6 @@ import { import type { IntermediateRepresentation, OptFn, Opts as LinkifyOpts } from 'linkifyjs'; import Linkify from 'linkify-react'; import type { ChildNode } from 'domhandler'; -import emojiRegex from 'emoji-regex'; - import * as css from '$styles/CustomHtml.css'; import { getCanonicalAliasRoomId, @@ -27,8 +25,9 @@ import { mxcUrlToHttp, } from '$utils/matrix'; import { getMemberDisplayName } from '$utils/room'; -import type { Nicknames } from '$state/nicknames'; -import { sanitizeForRegex, URL_NEG_LB } from '$utils/regex'; +import { type Nicknames } from '$state/nicknames'; +import { sanitizeForRegex, URL_REG } from '$utils/regex'; +import { splitEmojiText } from '$utils/emojiDetection'; import { findAndReplace } from '$utils/findAndReplace'; import { onEnterOrSpace } from '$utils/keyboard'; import { copyToClipboard } from '$utils/dom'; @@ -47,12 +46,9 @@ import { import { isRedundantMatrixUriAnchorText, parseMatrixUri, testMatrixUri } from './matrix-uri'; import { getHexcodeForEmoji, getShortcodeFor } from './emoji'; -const EMOJI_REG_G = new RegExp(`${URL_NEG_LB}(${emojiRegex().source})`, 'g'); - const shouldLinkifyDomText = (domNode: DOMText): boolean => !(domNode.parent && 'name' in domNode.parent && domNode.parent.name === 'code') && !(domNode.parent && 'name' in domNode.parent && domNode.parent.name === 'a'); - export const LINKIFY_OPTS: LinkifyOpts = { attributes: { target: '_blank', @@ -349,19 +345,56 @@ export const factoryRenderLinkifyWithMention = ( return renderLink; }; -export const scaleSystemEmoji = (text: string): (string | JSX.Element)[] => - findAndReplace( - text, - EMOJI_REG_G, - (match, pushIndex) => ( - - - {match[0]} +const scaleEmojiChunk = (text: string, output: (string | JSX.Element)[]) => { + splitEmojiText(text).forEach((part) => { + if (part.type === 'text') { + output.push(part.value); + return; + } + + output.push( + + + {part.value} - ), - (txt) => txt - ); + ); + }); +}; + +export const scaleSystemEmoji = (text: string): (string | JSX.Element)[] => { + const parts: (string | JSX.Element)[] = []; + const urlReg = new RegExp(URL_REG); + let lastIndex = 0; + + [...text.matchAll(urlReg)].forEach((match) => { + const start = match.index ?? 0; + scaleEmojiChunk(text.slice(lastIndex, start), parts); + parts.push(match[0]); + lastIndex = start + match[0].length; + }); + + scaleEmojiChunk(text.slice(lastIndex), parts); + + const normalized: (string | JSX.Element)[] = []; + parts.forEach((part) => { + if (typeof part !== 'string') { + normalized.push(part); + return; + } + + if (part === '') return; + const previous = normalized.at(-1); + if (typeof previous === 'string') { + normalized[normalized.length - 1] = `${previous}${part}`; + return; + } + + normalized.push(part); + }); + + return normalized.length > 0 ? normalized : ['']; +}; export const makeHighlightRegex = (highlights: string[]): RegExp | undefined => { const pattern = highlights.map(sanitizeForRegex).join('|'); diff --git a/src/app/utils/emojiDetection.test.ts b/src/app/utils/emojiDetection.test.ts new file mode 100644 index 000000000..d3df123ef --- /dev/null +++ b/src/app/utils/emojiDetection.test.ts @@ -0,0 +1,44 @@ +import { describe, it, expect } from 'vitest'; +import { isEmojiGrapheme, isJumboEmojiText, splitEmojiText } from './emojiDetection'; + +describe('isEmojiGrapheme', () => { + it.each(['🫩', '🫪', '🫯', '🇩🇪', '🙂‍↔️', '™️'])('matches emoji grapheme %s', (emoji) => { + expect(isEmojiGrapheme(emoji)).toBe(true); + }); + + it.each(['a', '12', 'http'])('does not match plain text segment %s', (value) => { + expect(isEmojiGrapheme(value)).toBe(false); + }); +}); + +describe('splitEmojiText', () => { + it('preserves newer emoji as standalone parts', () => { + expect(splitEmojiText('a🫪b')).toEqual([ + { type: 'text', value: 'a' }, + { type: 'emoji', value: '🫪' }, + { type: 'text', value: 'b' }, + ]); + }); + + it('keeps emoji sequences whole', () => { + expect(splitEmojiText('🙂‍↔️')).toEqual([ + { type: 'text', value: '' }, + { type: 'emoji', value: '🙂‍↔️' }, + { type: 'text', value: '' }, + ]); + }); +}); + +describe('isJumboEmojiText', () => { + it.each(['🫩', '🫪', '🫯', '🇩🇪', '🙂‍↔️'])('matches modern emoji sequence %s', (emoji) => { + expect(isJumboEmojiText(emoji)).toBe(true); + }); + + it.each(['123', 'hello', 'abc 123'])('does not match non-emoji text %s', (value) => { + expect(isJumboEmojiText(value)).toBe(false); + }); + + it('still matches shortcode-only content', () => { + expect(isJumboEmojiText(':blobcat:')).toBe(true); + }); +}); diff --git a/src/app/utils/emojiDetection.ts b/src/app/utils/emojiDetection.ts new file mode 100644 index 000000000..ba19616a7 --- /dev/null +++ b/src/app/utils/emojiDetection.ts @@ -0,0 +1,88 @@ +/** + * Emoji detection works on grapheme clusters, not raw code points. + * Intl.Segmenter keeps ZWJ sequences, flags, and keycaps intact as single user-visible units. + * Each grapheme is treated as emoji-like if it is a keycap sequence, an emoji forced by Variation Selector-16, or contains Emoji_Presentation, Extended_Pictographic, or Regional_Indicator. + * This is intentionally broader than `\p{RGI_Emoji}` because browsers can lag on that property for newer emojis like `🫪`. + * The goal here is UI rendering, so broad emoji-like detection is more useful than strict Unicode interchange validation. + */ + +const graphemeSegmenter = new Intl.Segmenter(undefined, { granularity: 'grapheme' }); + +const SHORTCODE_TOKEN_REG = /^:[^:\s]+:/u; +const EMOJI_GRAPHEME_REG = + /[#*0-9]\uFE0F?\u20E3|\p{Emoji}\uFE0F|[\p{Emoji_Presentation}\p{Extended_Pictographic}\p{Regional_Indicator}]/u; + +export type EmojiTextPart = + | { + type: 'text'; + value: string; + } + | { + type: 'emoji'; + value: string; + }; + +export const getFirstGrapheme = (text: string): string => { + const first = graphemeSegmenter.segment(text)[Symbol.iterator]().next(); + return first.done ? '' : first.value.segment; +}; + +export const isEmojiGrapheme = (segment: string): boolean => { + if (!segment) return false; + return EMOJI_GRAPHEME_REG.test(segment); +}; + +export const splitEmojiText = (text: string): EmojiTextPart[] => { + const parts: EmojiTextPart[] = []; + let buffer = ''; + let foundEmoji = false; + + [...graphemeSegmenter.segment(text)].forEach(({ segment }) => { + if (isEmojiGrapheme(segment)) { + foundEmoji = true; + parts.push({ type: 'text', value: buffer }); + buffer = ''; + parts.push({ type: 'emoji', value: segment }); + } else { + buffer += segment; + } + }); + + if (!foundEmoji) { + return [{ type: 'text', value: buffer }]; + } + + parts.push({ type: 'text', value: buffer }); + return parts; +}; + +export const isJumboEmojiText = (text: string, maxTokens = 10): boolean => { + if (!text) return false; + + let tokenCount = 0; + let index = 0; + + while (index < text.length) { + const remainder = text.slice(index); + const whitespaceMatch = /^\s+/u.exec(remainder); + if (whitespaceMatch) { + index += whitespaceMatch[0].length; + } else { + const shortcodeMatch = SHORTCODE_TOKEN_REG.exec(remainder); + if (shortcodeMatch) { + tokenCount += 1; + if (tokenCount > maxTokens) return false; + index += shortcodeMatch[0].length; + } else { + const grapheme = getFirstGrapheme(remainder); + if (!isEmojiGrapheme(grapheme)) return false; + + tokenCount += 1; + if (tokenCount > maxTokens) return false; + index += grapheme.length; + } + } + } + + return tokenCount > 0; +}; diff --git a/src/app/utils/regex.ts b/src/app/utils/regex.ts index 1e7b61aba..4f5bbc001 100644 --- a/src/app/utils/regex.ts +++ b/src/app/utils/regex.ts @@ -1,5 +1,3 @@ -import emojiRegex from 'emoji-regex'; - /** * https://www.npmjs.com/package/escape-string-regexp */ @@ -12,17 +10,3 @@ export const URL_REG = new RegExp(HTTP_URL_PATTERN, 'g'); export const EMAIL_REGEX = /^(([^<>()[\]\\.,;:\s@\\"]+(\.[^<>()[\]\\.,;:\s@\\"]+)*)|(\\".+\\"))@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\])|(([a-zA-Z\-0-9]+\.)+[a-zA-Z]{2,}))$/; - -export const URL_NEG_LB = '(?