From 682c98117d33295a1706ebef0c4cd68c7d3c31bc Mon Sep 17 00:00:00 2001 From: aizu-m Date: Tue, 16 Jun 2026 11:23:07 +0530 Subject: [PATCH 1/2] escape tab, newline and carriage return in attribute values add XmlOption to opt out of attribute whitespace escaping Default stays as escaping #x9/#xA/#xD in attribute values, with setSaveNoAttributeWhitespaceEscape() to restore the pre-existing behaviour of writing them literally. Honoured by both the text and optimize-for-speed savers. --- .../java/org/apache/xmlbeans/XmlOptions.java | 42 ++++++++++++++++++ .../apache/xmlbeans/impl/store/Cursor.java | 2 +- .../org/apache/xmlbeans/impl/store/Saver.java | 27 ++++++++++-- .../java/misc/checkin/XmlOptionsTest.java | 10 +++++ .../java/misc/detailed/CharEscapeTest.java | 43 +++++++++++++++++++ 5 files changed, 119 insertions(+), 5 deletions(-) diff --git a/src/main/java/org/apache/xmlbeans/XmlOptions.java b/src/main/java/org/apache/xmlbeans/XmlOptions.java index 6cec532fa..a79102b48 100644 --- a/src/main/java/org/apache/xmlbeans/XmlOptions.java +++ b/src/main/java/org/apache/xmlbeans/XmlOptions.java @@ -107,6 +107,7 @@ public enum XmlOptionsKeys { SAVE_CDATA_LENGTH_THRESHOLD, SAVE_CDATA_ENTITY_COUNT_THRESHOLD, SAVE_SAX_NO_NSDECLS_IN_ATTRIBUTES, + SAVE_NO_ATTRIBUTE_WHITESPACE_ESCAPE, LOAD_REPLACE_DOCUMENT_ELEMENT, LOAD_STRIP_WHITESPACE, LOAD_STRIP_COMMENTS, @@ -558,6 +559,47 @@ public boolean isSaveNoXmlDecl() { return hasOption(XmlOptionsKeys.SAVE_NO_XML_DECL); } + /** + * By default the saver now escapes a tab ({@code #x9}), newline ({@code #xA}) + * and carriage return ({@code #xD}) inside an attribute value as a character + * reference ({@code }, {@code }, {@code }). Without that, the + * literal characters are normalised to spaces when the document is read back + * in, so a save followed by a load silently rewrites the value. Set this + * option to restore the long-standing behaviour of writing those characters + * literally. + * + * @return this + * @since 5.4.0 + */ + public XmlOptions setSaveNoAttributeWhitespaceEscape() { + return setSaveNoAttributeWhitespaceEscape(true); + } + + /** + * Sets whether tab, newline and carriage return are written literally in + * attribute values instead of being escaped as character references. See + * {@link #setSaveNoAttributeWhitespaceEscape()}. + * + * @param b {@code true} to write those characters literally (pre-5.4.0 behaviour) + * @return this + * @since 5.4.0 + */ + public XmlOptions setSaveNoAttributeWhitespaceEscape(boolean b) { + return set(XmlOptionsKeys.SAVE_NO_ATTRIBUTE_WHITESPACE_ESCAPE, b); + } + + /** + * Returns whether tab, newline and carriage return are written literally in + * attribute values instead of being escaped. See + * {@link #setSaveNoAttributeWhitespaceEscape()}. + * + * @return {@code true} if those characters are written literally + * @since 5.4.0 + */ + public boolean isSaveNoAttributeWhitespaceEscape() { + return hasOption(XmlOptionsKeys.SAVE_NO_ATTRIBUTE_WHITESPACE_ESCAPE); + } + /** * This option controls when saving will use CDATA blocks. diff --git a/src/main/java/org/apache/xmlbeans/impl/store/Cursor.java b/src/main/java/org/apache/xmlbeans/impl/store/Cursor.java index e397626a0..3063d296d 100755 --- a/src/main/java/org/apache/xmlbeans/impl/store/Cursor.java +++ b/src/main/java/org/apache/xmlbeans/impl/store/Cursor.java @@ -585,7 +585,7 @@ public void _save(Writer w, XmlOptions options) throws IOException { } if (options != null && options.isSaveOptimizeForSpeed()) { - Saver.OptimizedForSpeedSaver.save(_cur, w); //ignore all other options + Saver.OptimizedForSpeedSaver.save(_cur, w, options); //ignore all other options bar attribute whitespace escaping return; } diff --git a/src/main/java/org/apache/xmlbeans/impl/store/Saver.java b/src/main/java/org/apache/xmlbeans/impl/store/Saver.java index 30307954e..daf34f334 100755 --- a/src/main/java/org/apache/xmlbeans/impl/store/Saver.java +++ b/src/main/java/org/apache/xmlbeans/impl/store/Saver.java @@ -52,6 +52,7 @@ abstract class Saver { private final boolean _useDefaultNamespace; private Map _preComputedNamespaces; private final boolean _saveNamespacesFirst; + private final boolean _escapeAttrWhitespace; private final ArrayList _attrNames = new ArrayList<>(); private final ArrayList _attrValues = new ArrayList<>(); @@ -131,6 +132,8 @@ protected void syntheticNamespace(String prefix, String uri, boolean considerDef _saveNamespacesFirst = options.isSaveNamespacesFirst(); + _escapeAttrWhitespace = !options.isSaveNoAttributeWhitespaceEscape(); + _suggestedPrefixes = options.getSaveSuggestedPrefixes(); @@ -273,6 +276,10 @@ protected boolean saveNamespacesFirst() { return _saveNamespacesFirst; } + protected boolean escapeAttrWhitespace() { + return _escapeAttrWhitespace; + } + protected final boolean process() { assert _locale.entered(); @@ -1370,6 +1377,12 @@ private void entitizeAttrValue(boolean replaceEscapedChar) { i = replace(i, "&"); } else if (ch == '"') { i = replace(i, """); + } else if (ch == '\t' && escapeAttrWhitespace()) { + i = replace(i, " "); + } else if (ch == '\n' && escapeAttrWhitespace()) { + i = replace(i, " "); + } else if (ch == '\r' && escapeAttrWhitespace()) { + i = replace(i, " "); } else if (isEscapedChar(ch)) { if (replaceEscapedChar) { i = replace(i, _replaceChar.getEscapedString(ch)); @@ -1795,15 +1808,15 @@ static private class SaverIOException } - OptimizedForSpeedSaver(Cur cur, Writer writer) { - super(cur, XmlOptions.maskNull(null)); + OptimizedForSpeedSaver(Cur cur, Writer writer, XmlOptions options) { + super(cur, XmlOptions.maskNull(options)); _w = writer; } - static void save(Cur cur, Writer writer) + static void save(Cur cur, Writer writer, XmlOptions options) throws IOException { try { - Saver saver = new OptimizedForSpeedSaver(cur, writer); + Saver saver = new OptimizedForSpeedSaver(cur, writer, options); //noinspection StatementWithEmptyBody while (saver.process()) { } @@ -2027,6 +2040,12 @@ private void emitAttrValue(CharSequence attVal) { emit("&"); } else if (ch == '"') { emit("""); + } else if (ch == '\t' && escapeAttrWhitespace()) { + emit(" "); + } else if (ch == '\n' && escapeAttrWhitespace()) { + emit(" "); + } else if (ch == '\r' && escapeAttrWhitespace()) { + emit(" "); } else { emit(ch); } diff --git a/src/test/java/misc/checkin/XmlOptionsTest.java b/src/test/java/misc/checkin/XmlOptionsTest.java index e7ee411df..d791987ee 100644 --- a/src/test/java/misc/checkin/XmlOptionsTest.java +++ b/src/test/java/misc/checkin/XmlOptionsTest.java @@ -40,4 +40,14 @@ void testLoadStrictFloatingPointFlag() { xmlOptions.setLoadStrictFloatingPoint(false); assertFalse(xmlOptions.isLoadStrictFloatingPoint()); } + + @Test + void testSaveNoAttributeWhitespaceEscapeFlag() { + XmlOptions xmlOptions = new XmlOptions(); + assertFalse(xmlOptions.isSaveNoAttributeWhitespaceEscape()); + xmlOptions.setSaveNoAttributeWhitespaceEscape(); + assertTrue(xmlOptions.isSaveNoAttributeWhitespaceEscape()); + xmlOptions.setSaveNoAttributeWhitespaceEscape(false); + assertFalse(xmlOptions.isSaveNoAttributeWhitespaceEscape()); + } } diff --git a/src/test/java/misc/detailed/CharEscapeTest.java b/src/test/java/misc/detailed/CharEscapeTest.java index 486511be6..21b8cb4e9 100644 --- a/src/test/java/misc/detailed/CharEscapeTest.java +++ b/src/test/java/misc/detailed/CharEscapeTest.java @@ -17,11 +17,13 @@ import jira.xmlbeans177.TestListDocument; import jira.xmlbeans177A.TestListADocument; import org.apache.xmlbeans.XmlException; +import org.apache.xmlbeans.XmlObject; import org.apache.xmlbeans.XmlOptionCharEscapeMap; import org.apache.xmlbeans.XmlOptions; import org.junit.jupiter.api.Test; import java.io.File; +import java.io.StringWriter; import java.util.HashMap; import java.util.Map; @@ -236,4 +238,45 @@ void testEscapeAttribute() throws Exception { end2; assertEquals(exp3, doc.xmlText(opts).replaceFirst("(?s)", "")); } + + @Test + void testEscapeAttributeWhitespace() throws Exception { + // tab, newline and carriage return survive attribute-value normalisation + // only when written as character references; a literal char would be + // turned into a space when the document is read back in + XmlObject doc = XmlObject.Factory.parse(""); + + String expected = ""; + assertEquals(expected, doc.xmlText()); + + StringWriter sw = new StringWriter(); + doc.save(sw); + assertEquals(expected, sw.toString()); + + // round-trips with the value intact + org.apache.xmlbeans.XmlCursor c = XmlObject.Factory.parse(doc.xmlText()).newCursor(); + c.toFirstChild(); + assertEquals("x\ty\nz\rw", c.getAttributeText(new javax.xml.namespace.QName("a"))); + c.dispose(); + } + + @Test + void testEscapeAttributeWhitespaceOptOut() throws Exception { + // setSaveNoAttributeWhitespaceEscape restores the pre-5.4.0 behaviour of + // writing tab, newline and carriage return literally + XmlObject doc = XmlObject.Factory.parse(""); + + XmlOptions opts = new XmlOptions().setSaveNoAttributeWhitespaceEscape(); + String expected = ""; + assertEquals(expected, doc.xmlText(opts)); + + StringWriter sw = new StringWriter(); + doc.save(sw, opts); + assertEquals(expected, sw.toString()); + + // the optimize-for-speed writer honours the option too + StringWriter sw2 = new StringWriter(); + doc.save(sw2, new XmlOptions(opts).setSaveOptimizeForSpeed(true)); + assertEquals(expected, sw2.toString()); + } } From 1cef5da3eccaaee4981a6a8dbf32fb0ef9f24b8a Mon Sep 17 00:00:00 2001 From: PJ Fanning Date: Tue, 16 Jun 2026 11:39:19 +0100 Subject: [PATCH 2/2] refactor --- src/main/java/org/apache/xmlbeans/impl/store/Cursor.java | 6 +++--- src/main/java/org/apache/xmlbeans/impl/store/Saver.java | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/main/java/org/apache/xmlbeans/impl/store/Cursor.java b/src/main/java/org/apache/xmlbeans/impl/store/Cursor.java index 3063d296d..b29ef05c1 100755 --- a/src/main/java/org/apache/xmlbeans/impl/store/Cursor.java +++ b/src/main/java/org/apache/xmlbeans/impl/store/Cursor.java @@ -179,7 +179,7 @@ private void insertNode(Cur that, String text) { assert isValid(that); assert isValid(); - if (text != null && text.length() > 0) { + if (text != null && !text.isEmpty()) { that.next(); that.insertString(text); that.toParent(); @@ -247,11 +247,11 @@ public void _setName(QName name) { case PROCINST: { validatePrefix(name.getLocalPart()); - if (name.getNamespaceURI().length() > 0) { + if (!name.getNamespaceURI().isEmpty()) { throw new IllegalArgumentException("Procinst name must have no URI"); } - if (name.getPrefix().length() > 0) { + if (!name.getPrefix().isEmpty()) { throw new IllegalArgumentException("Procinst name must have no prefix"); } diff --git a/src/main/java/org/apache/xmlbeans/impl/store/Saver.java b/src/main/java/org/apache/xmlbeans/impl/store/Saver.java index daf34f334..885501970 100755 --- a/src/main/java/org/apache/xmlbeans/impl/store/Saver.java +++ b/src/main/java/org/apache/xmlbeans/impl/store/Saver.java @@ -1589,7 +1589,7 @@ private int resize(int cch, int i) { (_out == _in && _free == 0) // buffer full : "_buf.length:" + _cbuf.length + " _in:" + _in + " _out:" + _out + " _free:" + _free; - long newLen = _cbuf == null ? _initialBufSize : _cbuf.length * 2; + long newLen = _cbuf == null ? _initialBufSize : _cbuf.length * 2L; int used = getAvailable(); while (newLen - used < cch) { @@ -2500,7 +2500,7 @@ public void write(byte[] buf, int off, int cbyte) { void resize(int cbyte) { assert cbyte > _free : cbyte + " !> " + _free; - long newLen = _buf == null ? _initialBufSize : _buf.length * 2; + long newLen = _buf == null ? _initialBufSize : _buf.length * 2L; int used = getAvailable(); while (newLen - used < cbyte) {