Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,6 @@ private int readAsMuchAsPossible(byte[] buf, int startAt, int len) throws IOExce

private String sniffFourBytes() throws IOException {
mark(4);
int skip = 0;
try {
byte[] buf = new byte[4];
if (readAsMuchAsPossible(buf, 0, 4) < 4) {
Expand Down Expand Up @@ -124,37 +123,25 @@ private String sniffFourBytes() throws IOException {
}
}

// BUGBUG in JDK: Charset.forName is not threadsafe, so we'll prime it
// with the common charsets.

private static Charset dummy1 = Charset.forName("UTF-8");
private static Charset dummy2 = Charset.forName("UTF-16");
private static Charset dummy3 = Charset.forName("UTF-16BE");
private static Charset dummy4 = Charset.forName("UTF-16LE");
private static Charset dummy5 = Charset.forName("ISO-8859-1");
private static Charset dummy6 = Charset.forName("US-ASCII");
private static Charset dummy7 = Charset.forName("Cp1252");


private String sniffForXmlDecl(String encoding) throws IOException {
mark(MAX_SNIFFED_BYTES);
try {
byte[] bytebuf = new byte[MAX_SNIFFED_BYTES];
int bytelimit = readAsMuchAsPossible(bytebuf, 0, MAX_SNIFFED_BYTES);

// BUGBUG in JDK: Charset.forName is not threadsafe.
Charset charset = Charset.forName(encoding);
Reader reader = new InputStreamReader(new ByteArrayInputStream(bytebuf, 0, bytelimit), charset);
char[] buf = new char[bytelimit];
int limit = 0;
while (limit < bytelimit) {
int count = reader.read(buf, limit, bytelimit - limit);
if (count < 0) {
break;
char[] buf = new char[bytelimit];
try (Reader reader = new InputStreamReader(
new ByteArrayInputStream(bytebuf, 0, bytelimit), charset)) {
while (limit < bytelimit) {
int count = reader.read(buf, limit, bytelimit - limit);
if (count < 0) {
break;
}
limit += count;
}
limit += count;
}

return extractXmlDeclEncoding(buf, 0, limit);
} finally {
reset();
Expand Down Expand Up @@ -222,7 +209,6 @@ private static int nextNonmatchingByte(char[] lookFor, char[] buf, int startAt,
}

private static int nextMatchingByte(char[] lookFor, char[] buf, int startAt, int limit) {
searching:
for (; startAt < limit; startAt++) {
int thischar = buf[startAt];
for (int i = 0; i < lookFor.length; i++) {
Expand All @@ -235,7 +221,6 @@ private static int nextMatchingByte(char[] lookFor, char[] buf, int startAt, int
}

private static int nextMatchingByte(char lookFor, char[] buf, int startAt, int limit) {
searching:
for (; startAt < limit; startAt++) {
if (buf[startAt] == lookFor) {
return startAt;
Expand All @@ -244,8 +229,8 @@ private static int nextMatchingByte(char lookFor, char[] buf, int startAt, int l
return -1;
}

private static char[] WHITESPACE = new char[]{' ', '\r', '\t', '\n'};
private static char[] NOTNAME = new char[]{'=', ' ', '\r', '\t', '\n', '?', '>', '<', '\'', '\"'};
private static final char[] WHITESPACE = new char[]{' ', '\r', '\t', '\n'};
private static final char[] NOTNAME = new char[]{'=', ' ', '\r', '\t', '\n', '?', '>', '<', '\'', '\"'};

private static class ScannedAttribute {
public String name;
Expand All @@ -269,6 +254,9 @@ private static int scanAttribute(char[] buf, int startAt, int limit, ScannedAttr
return -1;
}
int valQuote = nextNonmatchingByte(WHITESPACE, buf, equals + 1, limit);
if (valQuote < 0) {
return -1;
}
if (buf[valQuote] != '\'' && buf[valQuote] != '\"') {
return -1;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
import java.io.BufferedReader;
import java.io.IOException;
import java.io.Reader;
import java.nio.charset.Charset;

public class SniffedXmlReader extends BufferedReader {
// We don't sniff more than 192 bytes.
Expand All @@ -41,18 +40,6 @@ private int readAsMuchAsPossible(char[] buf, int startAt, int len) throws IOExce
return total;
}

// BUGBUG in JDK: Charset.forName is not threadsafe, so we'll prime it
// with the common charsets.

private static Charset dummy1 = Charset.forName("UTF-8");
private static Charset dummy2 = Charset.forName("UTF-16");
private static Charset dummy3 = Charset.forName("UTF-16BE");
private static Charset dummy4 = Charset.forName("UTF-16LE");
private static Charset dummy5 = Charset.forName("ISO-8859-1");
private static Charset dummy6 = Charset.forName("US-ASCII");
private static Charset dummy7 = Charset.forName("Cp1252");


private String sniffForXmlDecl() throws IOException {
mark(MAX_SNIFFED_CHARS);
try {
Expand Down
53 changes: 53 additions & 0 deletions src/test/java/misc/checkin/SniffedXmlEncodingTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
/* Copyright 2004 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package misc.checkin;

import org.apache.xmlbeans.impl.common.SniffedXmlInputStream;
import org.apache.xmlbeans.impl.common.SniffedXmlReader;
import org.junit.jupiter.api.Test;

import java.io.ByteArrayInputStream;
import java.io.StringReader;
import java.nio.charset.StandardCharsets;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNull;

public class SniffedXmlEncodingTest {

// An xml declaration pseudo-attribute ending in '=' with no value, only
// whitespace up to the end of the sniffed buffer, used to index buf[-1]
// while scanning for the opening quote. The sniffer must report "no
// encoding found" instead of throwing ArrayIndexOutOfBoundsException.
@Test
void valuelessAttributeReader() throws Exception {
for (String s : new String[]{"<?xml x=", "<?xml version=\"1.0\" encoding= ", "<?xml a= \t\n"}) {
assertNull(new SniffedXmlReader(new StringReader(s)).getXmlEncoding());
}
}

@Test
void valuelessAttributeStream() throws Exception {
byte[] b = "<?xml version=\"1.0\" encoding= ".getBytes(StandardCharsets.US_ASCII);
assertEquals("UTF-8", new SniffedXmlInputStream(new ByteArrayInputStream(b)).getXmlEncoding());
}

@Test
void wellFormedDeclarationStillDetected() throws Exception {
byte[] b = "<?xml version='1.0' encoding='ISO-8859-1'?>".getBytes(StandardCharsets.US_ASCII);
assertEquals("ISO-8859-1", new SniffedXmlInputStream(new ByteArrayInputStream(b)).getXmlEncoding());
}
}
Loading