From 402075b1d0a0d74c21b6de836122d656103ef9dc Mon Sep 17 00:00:00 2001 From: aizu-m Date: Tue, 16 Jun 2026 21:50:22 +0530 Subject: [PATCH 1/2] fix out-of-bounds read in scanAttribute for valueless xml decl attr --- .../impl/common/SniffedXmlInputStream.java | 3 ++ .../misc/checkin/SniffedXmlEncodingTest.java | 53 +++++++++++++++++++ 2 files changed, 56 insertions(+) create mode 100644 src/test/java/misc/checkin/SniffedXmlEncodingTest.java diff --git a/src/main/java/org/apache/xmlbeans/impl/common/SniffedXmlInputStream.java b/src/main/java/org/apache/xmlbeans/impl/common/SniffedXmlInputStream.java index ca80f8370..419f2b990 100644 --- a/src/main/java/org/apache/xmlbeans/impl/common/SniffedXmlInputStream.java +++ b/src/main/java/org/apache/xmlbeans/impl/common/SniffedXmlInputStream.java @@ -269,6 +269,9 @@ private static int scanAttribute(char[] buf, int startAt, int limit, ScannedAttr return -1; } int valQuote = nextNonmatchingByte(WHITESPACE, buf, equals + 1, limit); + if (valQuote < 0) { + return -1; + } if (buf[valQuote] != '\'' && buf[valQuote] != '\"') { return -1; } diff --git a/src/test/java/misc/checkin/SniffedXmlEncodingTest.java b/src/test/java/misc/checkin/SniffedXmlEncodingTest.java new file mode 100644 index 000000000..62c9f491a --- /dev/null +++ b/src/test/java/misc/checkin/SniffedXmlEncodingTest.java @@ -0,0 +1,53 @@ +/* Copyright 2004 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package misc.checkin; + +import org.apache.xmlbeans.impl.common.SniffedXmlInputStream; +import org.apache.xmlbeans.impl.common.SniffedXmlReader; +import org.junit.jupiter.api.Test; + +import java.io.ByteArrayInputStream; +import java.io.StringReader; +import java.nio.charset.StandardCharsets; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNull; + +public class SniffedXmlEncodingTest { + + // An xml declaration pseudo-attribute ending in '=' with no value, only + // whitespace up to the end of the sniffed buffer, used to index buf[-1] + // while scanning for the opening quote. The sniffer must report "no + // encoding found" instead of throwing ArrayIndexOutOfBoundsException. + @Test + void valuelessAttributeReader() throws Exception { + for (String s : new String[]{"".getBytes(StandardCharsets.US_ASCII); + assertEquals("ISO-8859-1", new SniffedXmlInputStream(new ByteArrayInputStream(b)).getXmlEncoding()); + } +} From ec27d82069629af0f7899f9627dee32aa95a2594 Mon Sep 17 00:00:00 2001 From: PJ Fanning Date: Tue, 16 Jun 2026 17:54:47 +0100 Subject: [PATCH 2/2] refactor --- .../impl/common/SniffedXmlInputStream.java | 37 ++++++------------- .../impl/common/SniffedXmlReader.java | 13 ------- 2 files changed, 11 insertions(+), 39 deletions(-) diff --git a/src/main/java/org/apache/xmlbeans/impl/common/SniffedXmlInputStream.java b/src/main/java/org/apache/xmlbeans/impl/common/SniffedXmlInputStream.java index 419f2b990..b1705eef6 100644 --- a/src/main/java/org/apache/xmlbeans/impl/common/SniffedXmlInputStream.java +++ b/src/main/java/org/apache/xmlbeans/impl/common/SniffedXmlInputStream.java @@ -86,7 +86,6 @@ private int readAsMuchAsPossible(byte[] buf, int startAt, int len) throws IOExce private String sniffFourBytes() throws IOException { mark(4); - int skip = 0; try { byte[] buf = new byte[4]; if (readAsMuchAsPossible(buf, 0, 4) < 4) { @@ -124,37 +123,25 @@ private String sniffFourBytes() throws IOException { } } - // BUGBUG in JDK: Charset.forName is not threadsafe, so we'll prime it - // with the common charsets. - - private static Charset dummy1 = Charset.forName("UTF-8"); - private static Charset dummy2 = Charset.forName("UTF-16"); - private static Charset dummy3 = Charset.forName("UTF-16BE"); - private static Charset dummy4 = Charset.forName("UTF-16LE"); - private static Charset dummy5 = Charset.forName("ISO-8859-1"); - private static Charset dummy6 = Charset.forName("US-ASCII"); - private static Charset dummy7 = Charset.forName("Cp1252"); - - private String sniffForXmlDecl(String encoding) throws IOException { mark(MAX_SNIFFED_BYTES); try { byte[] bytebuf = new byte[MAX_SNIFFED_BYTES]; int bytelimit = readAsMuchAsPossible(bytebuf, 0, MAX_SNIFFED_BYTES); - // BUGBUG in JDK: Charset.forName is not threadsafe. Charset charset = Charset.forName(encoding); - Reader reader = new InputStreamReader(new ByteArrayInputStream(bytebuf, 0, bytelimit), charset); - char[] buf = new char[bytelimit]; int limit = 0; - while (limit < bytelimit) { - int count = reader.read(buf, limit, bytelimit - limit); - if (count < 0) { - break; + char[] buf = new char[bytelimit]; + try (Reader reader = new InputStreamReader( + new ByteArrayInputStream(bytebuf, 0, bytelimit), charset)) { + while (limit < bytelimit) { + int count = reader.read(buf, limit, bytelimit - limit); + if (count < 0) { + break; + } + limit += count; } - limit += count; } - return extractXmlDeclEncoding(buf, 0, limit); } finally { reset(); @@ -222,7 +209,6 @@ private static int nextNonmatchingByte(char[] lookFor, char[] buf, int startAt, } private static int nextMatchingByte(char[] lookFor, char[] buf, int startAt, int limit) { - searching: for (; startAt < limit; startAt++) { int thischar = buf[startAt]; for (int i = 0; i < lookFor.length; i++) { @@ -235,7 +221,6 @@ private static int nextMatchingByte(char[] lookFor, char[] buf, int startAt, int } private static int nextMatchingByte(char lookFor, char[] buf, int startAt, int limit) { - searching: for (; startAt < limit; startAt++) { if (buf[startAt] == lookFor) { return startAt; @@ -244,8 +229,8 @@ private static int nextMatchingByte(char lookFor, char[] buf, int startAt, int l return -1; } - private static char[] WHITESPACE = new char[]{' ', '\r', '\t', '\n'}; - private static char[] NOTNAME = new char[]{'=', ' ', '\r', '\t', '\n', '?', '>', '<', '\'', '\"'}; + private static final char[] WHITESPACE = new char[]{' ', '\r', '\t', '\n'}; + private static final char[] NOTNAME = new char[]{'=', ' ', '\r', '\t', '\n', '?', '>', '<', '\'', '\"'}; private static class ScannedAttribute { public String name; diff --git a/src/main/java/org/apache/xmlbeans/impl/common/SniffedXmlReader.java b/src/main/java/org/apache/xmlbeans/impl/common/SniffedXmlReader.java index 214ec4d48..bab734ba8 100644 --- a/src/main/java/org/apache/xmlbeans/impl/common/SniffedXmlReader.java +++ b/src/main/java/org/apache/xmlbeans/impl/common/SniffedXmlReader.java @@ -18,7 +18,6 @@ import java.io.BufferedReader; import java.io.IOException; import java.io.Reader; -import java.nio.charset.Charset; public class SniffedXmlReader extends BufferedReader { // We don't sniff more than 192 bytes. @@ -41,18 +40,6 @@ private int readAsMuchAsPossible(char[] buf, int startAt, int len) throws IOExce return total; } - // BUGBUG in JDK: Charset.forName is not threadsafe, so we'll prime it - // with the common charsets. - - private static Charset dummy1 = Charset.forName("UTF-8"); - private static Charset dummy2 = Charset.forName("UTF-16"); - private static Charset dummy3 = Charset.forName("UTF-16BE"); - private static Charset dummy4 = Charset.forName("UTF-16LE"); - private static Charset dummy5 = Charset.forName("ISO-8859-1"); - private static Charset dummy6 = Charset.forName("US-ASCII"); - private static Charset dummy7 = Charset.forName("Cp1252"); - - private String sniffForXmlDecl() throws IOException { mark(MAX_SNIFFED_CHARS); try {