From 1bcedd7bd97ed43c643858bab7c81a82a92ced40 Mon Sep 17 00:00:00 2001 From: Alhuda Khan Date: Wed, 24 Jun 2026 21:12:48 +0530 Subject: [PATCH] keep StrBuilder.reverse from splitting surrogate pairs --- .../apache/commons/lang3/text/StrBuilder.java | 21 ++++++++++++++++--- .../commons/lang3/text/StrBuilderTest.java | 15 +++++++++++++ 2 files changed, 33 insertions(+), 3 deletions(-) diff --git a/src/main/java/org/apache/commons/lang3/text/StrBuilder.java b/src/main/java/org/apache/commons/lang3/text/StrBuilder.java index 45dd1e0a06f..13bdb64f02e 100644 --- a/src/main/java/org/apache/commons/lang3/text/StrBuilder.java +++ b/src/main/java/org/apache/commons/lang3/text/StrBuilder.java @@ -2758,10 +2758,25 @@ public StrBuilder reverse() { final int half = size / 2; final char[] buf = buffer; + boolean hasSurrogates = false; for (int leftIdx = 0, rightIdx = size - 1; leftIdx < half; leftIdx++, rightIdx--) { - final char swap = buf[leftIdx]; - buf[leftIdx] = buf[rightIdx]; - buf[rightIdx] = swap; + final char left = buf[leftIdx]; + final char right = buf[rightIdx]; + buf[leftIdx] = right; + buf[rightIdx] = left; + hasSurrogates |= Character.isSurrogate(left) || Character.isSurrogate(right); + } + if (hasSurrogates) { + // The plain swap leaves each surrogate pair in low-high order; restore the high-low order so a + // reversed supplementary code point stays a valid pair, matching StringBuilder#reverse(). + for (int i = 0; i < size - 1; i++) { + if (Character.isLowSurrogate(buf[i]) && Character.isHighSurrogate(buf[i + 1])) { + final char low = buf[i]; + buf[i] = buf[i + 1]; + buf[i + 1] = low; + i++; + } + } } return this; } diff --git a/src/test/java/org/apache/commons/lang3/text/StrBuilderTest.java b/src/test/java/org/apache/commons/lang3/text/StrBuilderTest.java index 5d6b739c43a..3a711c6b47e 100644 --- a/src/test/java/org/apache/commons/lang3/text/StrBuilderTest.java +++ b/src/test/java/org/apache/commons/lang3/text/StrBuilderTest.java @@ -1633,6 +1633,21 @@ void testReverse() { assertEquals("true", sb.reverse().toString()); } + @Test + void testReverseSurrogatePairs() { + // U+1F600 GRINNING FACE is a supplementary code point encoded as a surrogate pair; reversing + // must keep the pair intact (high before low) like StringBuilder, not split it into garbage. + final String emoji = "😀"; + assertEquals(new StringBuilder("a" + emoji + "b").reverse().toString(), new StrBuilder("a" + emoji + "b").reverse().toString()); + assertEquals("b" + emoji + "a", new StrBuilder("a" + emoji + "b").reverse().toString()); + + final String emoji2 = "😁"; + assertEquals(emoji2 + emoji, new StrBuilder(emoji + emoji2).reverse().toString()); + + // An unpaired high surrogate has no partner and is left where it lands. + assertEquals("b\uD800a", new StrBuilder("a\uD800b").reverse().toString()); + } + @Test void testRightString() { final StrBuilder sb = new StrBuilder("left right");