From e1db86089ad271ab50da3d66686f5e61fe16fb50 Mon Sep 17 00:00:00 2001 From: Alhuda Khan Date: Fri, 26 Jun 2026 13:38:49 +0530 Subject: [PATCH] Keep WordUtils.wrap from splitting a surrogate pair --- src/main/java/org/apache/commons/text/WordUtils.java | 10 +++++++--- .../java/org/apache/commons/text/WordUtilsTest.java | 8 ++++++++ 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/src/main/java/org/apache/commons/text/WordUtils.java b/src/main/java/org/apache/commons/text/WordUtils.java index e03024a403..fea98c711b 100644 --- a/src/main/java/org/apache/commons/text/WordUtils.java +++ b/src/main/java/org/apache/commons/text/WordUtils.java @@ -843,10 +843,14 @@ public static String wrap(final String str, if (matcherSize == 0) { offset--; } - // wrap really long word one line at a time - wrappedLine.append(str, offset, wrapLength + offset); + // wrap really long word one line at a time, but keep a surrogate pair whole + int wrapAt = wrapLength + offset; + if (Character.isHighSurrogate(str.charAt(wrapAt - 1)) && Character.isLowSurrogate(str.charAt(wrapAt))) { + wrapAt++; + } + wrappedLine.append(str, offset, wrapAt); wrappedLine.append(newLineStr); - offset += wrapLength; + offset = wrapAt; matcherSize = -1; } else { // do not wrap really long word, just extend beyond limit diff --git a/src/test/java/org/apache/commons/text/WordUtilsTest.java b/src/test/java/org/apache/commons/text/WordUtilsTest.java index 078ab1aebc..f267f09346 100644 --- a/src/test/java/org/apache/commons/text/WordUtilsTest.java +++ b/src/test/java/org/apache/commons/text/WordUtilsTest.java @@ -543,6 +543,14 @@ void testWrap_StringIntStringBoolean() { assertEquals(expected, WordUtils.wrap(input, 20, "\n", false)); expected = "Click here,\nhttps://commons.apac\nhe.org, to jump to\nthe commons website"; assertEquals(expected, WordUtils.wrap(input, 20, "\n", true)); + + // a hard break for a long word must not split a surrogate pair across the new line + input = "a\uD83D\uDE00\uD83D\uDE00\uD83D\uDE00\uD83D\uDE00"; + expected = "a\uD83D\uDE00\uD83D\uDE00\n\uD83D\uDE00\uD83D\uDE00"; + assertEquals(expected, WordUtils.wrap(input, 4, "\n", true)); + input = "\uD83D\uDE00\uD83D\uDE00\uD83D\uDE00"; + expected = "\uD83D\uDE00\uD83D\uDE00\n\uD83D\uDE00"; + assertEquals(expected, WordUtils.wrap(input, 3, "\n", true)); } @Test