diff --git a/.gitignore b/.gitignore index 9644027b7..98ca6bf64 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ build/ +.gradle/ lib/**/*.jar /bin gradle/wrapper/gradle-wrapper.jar diff --git a/src/main/java/org/apache/xmlbeans/XmlOptions.java b/src/main/java/org/apache/xmlbeans/XmlOptions.java index 3d055b867..6cec532fa 100644 --- a/src/main/java/org/apache/xmlbeans/XmlOptions.java +++ b/src/main/java/org/apache/xmlbeans/XmlOptions.java @@ -156,7 +156,8 @@ public enum XmlOptionsKeys { LOAD_USE_LOCALE_CHAR_UTIL, XPATH_USE_SAXON, XPATH_USE_XMLBEANS, - ATTRIBUTE_VALIDATION_COMPAT_MODE + ATTRIBUTE_VALIDATION_COMPAT_MODE, + LOAD_STRICT_FLOATING_POINT } @@ -1123,6 +1124,47 @@ public boolean isValidateOnSet() { return hasOption(XmlOptionsKeys.VALIDATE_ON_SET); } + /** + * If this option is set, xsd:float and xsd:double values are held to the XSD + * lexical space when parsing. {@link Float#parseFloat}/{@link Double#parseDouble} + * also accept lexical forms that XSD does not allow: hexadecimal floats + * ({@code 0x1p4}), the Java {@code Infinity} token, and a trailing type suffix + * ({@code f}/{@code F}/{@code d}/{@code D}). With this option set those forms are + * rejected as invalid; XSD only permits a decimal number with an optional + * exponent, or the special values {@code INF}, {@code -INF} and {@code NaN}. + * The default value is false, so the long-standing lenient behaviour is + * unchanged unless this is set. + * + * @return this + * @since 5.4.0 + */ + public XmlOptions setLoadStrictFloatingPoint() { + return setLoadStrictFloatingPoint(true); + } + + /** + * Sets whether xsd:float and xsd:double values are held to the XSD lexical + * space when parsing. See {@link #setLoadStrictFloatingPoint()}. + * + * @param b {@code true} to reject lexical forms outside the XSD float/double space + * @return this + * @since 5.4.0 + */ + public XmlOptions setLoadStrictFloatingPoint(boolean b) { + return set(XmlOptionsKeys.LOAD_STRICT_FLOATING_POINT, b); + } + + /** + * Returns whether xsd:float and xsd:double values are held to the XSD lexical + * space when parsing. See {@link #setLoadStrictFloatingPoint()}. + * + * @return {@code true} if strict XSD float/double parsing is enabled + * @since 5.4.0 + */ + public boolean isLoadStrictFloatingPoint() { + return hasOption(XmlOptionsKeys.LOAD_STRICT_FLOATING_POINT); + } + /** * Instructs the validator to skip elements matching an {@code } * particle with contentModel="lax". This is useful because, diff --git a/src/main/java/org/apache/xmlbeans/impl/common/XmlLocale.java b/src/main/java/org/apache/xmlbeans/impl/common/XmlLocale.java index 5f47a9f4b..8ced9416a 100755 --- a/src/main/java/org/apache/xmlbeans/impl/common/XmlLocale.java +++ b/src/main/java/org/apache/xmlbeans/impl/common/XmlLocale.java @@ -21,7 +21,12 @@ public interface XmlLocale { boolean sync ( ); boolean noSync ( ); - + void enter ( ); void exit ( ); -} \ No newline at end of file + + // whether lexFloat/lexDouble should reject lexical forms that are outside + // the xsd:float/xsd:double space (hex floats, the java "Infinity" token and + // the f/F/d/D suffix). Driven by XmlOptions.setLoadStrictFloatingPoint. + default boolean isLoadStrictFloatingPoint ( ) { return false; } +} diff --git a/src/main/java/org/apache/xmlbeans/impl/store/Locale.java b/src/main/java/org/apache/xmlbeans/impl/store/Locale.java index 702f9ba3f..02728b7fa 100755 --- a/src/main/java/org/apache/xmlbeans/impl/store/Locale.java +++ b/src/main/java/org/apache/xmlbeans/impl/store/Locale.java @@ -103,6 +103,8 @@ private Locale(SchemaTypeLoader stl, XmlOptions options) { _validateOnSet = options.isValidateOnSet(); + _loadStrictFloatingPoint = options.isLoadStrictFloatingPoint(); + // // Check for Saaj implementation request // @@ -2071,6 +2073,10 @@ public boolean sync() { return !_noSync; } + public boolean isLoadStrictFloatingPoint() { + return _loadStrictFloatingPoint; + } + static boolean isWhiteSpace(String s) { int l = s.length(); @@ -2789,6 +2795,8 @@ public QName getQName(char[] uriSrc, int uriPos, int uriCch, boolean _validateOnSet; + boolean _loadStrictFloatingPoint; + int _posTemp; nthCache _nthCache_A = new nthCache(); diff --git a/src/main/java/org/apache/xmlbeans/impl/util/XsTypeConverter.java b/src/main/java/org/apache/xmlbeans/impl/util/XsTypeConverter.java index 4fb738c5a..5462b6c5f 100644 --- a/src/main/java/org/apache/xmlbeans/impl/util/XsTypeConverter.java +++ b/src/main/java/org/apache/xmlbeans/impl/util/XsTypeConverter.java @@ -39,8 +39,59 @@ public final class XsTypeConverter { private static final String[] URI_CHARS_TO_BE_REPLACED = {" ", "{", "}", "|", "\\", "^", "[", "]", "`"}; private static final String[] URI_CHARS_REPLACED_WITH = {"%20", "%7b", "%7d", "%7c", "%5c", "%5e", "%5b", "%5d", "%60"}; + // Float.parseFloat / Double.parseDouble accept lexical forms that are not + // in the XSD float/double value space: hexadecimal floats (0x1p4), the Java + // "Infinity" token, and a trailing type suffix (f/F/d/D). XSD only allows a + // decimal number with an optional exponent, or the special values INF, -INF + // and NaN. This is only applied when strict floating point parsing is + // requested (XmlOptions.setLoadStrictFloatingPoint); the default stays lenient. + private static void checkFloatingPointLexical(CharSequence cs) { + final int len = cs.length(); + for (int i = 0; i < len; i++) { + switch (cs.charAt(i)) { + case 'x': + case 'X': + case 'p': + case 'P': + case 'i': + case 't': + case 'y': + throw new NumberFormatException("invalid char '" + cs.charAt(i) + "' in floating point value"); + default: + break; + } + } + if (len > 0) { + final char last = cs.charAt(len - 1); + // a trailing 'F' is only valid as the last char of "INF" + if (last == 'd' || last == 'D' || + ((last == 'f' || last == 'F') && (len < 2 || cs.charAt(len - 2) != 'N'))) { + throw new NumberFormatException("invalid trailing char '" + last + "' in floating point value"); + } + } + } + // ======================== float ======================== public static float lexFloat(CharSequence cs) + throws NumberFormatException { + return lexFloat(cs, false); + } + + /** + * Parses an xsd:float lexical value. + * + * @param cs the lexical value + * @param strict when {@code true}, lexical forms that {@link Float#parseFloat} accepts + * but XSD does not are rejected: hexadecimal floats ({@code 0x1p4}), the + * Java {@code Infinity} token, and a trailing type suffix + * ({@code f}/{@code F}/{@code d}/{@code D}). When {@code false} the + * long-standing lenient behaviour applies. Driven by + * {@link org.apache.xmlbeans.XmlOptions#setLoadStrictFloatingPoint()}. + * @return the parsed float + * @throws NumberFormatException if the value is not a valid xsd:float + * @since 5.4.0 + */ + public static float lexFloat(CharSequence cs, boolean strict) throws NumberFormatException { final String v = cs.toString(); switch (v) { @@ -50,18 +101,19 @@ public static float lexFloat(CharSequence cs) return Float.NEGATIVE_INFINITY; case NAN_LEX: return Float.NaN; - default: - //current jdk impl of parseFloat calls trim() on the string. - //Any other space is illegal anyway, whether there are one or more spaces. - //so no need to do a collapse pass through the string. - if (cs.length() > 1) { - char ch = cs.charAt(cs.length() - 1); - if ((ch == 'f' || ch == 'F') && cs.charAt(cs.length() - 2) != 'N') { - throw new NumberFormatException("Invalid char '" + ch + "' in float."); - } - } - return Float.parseFloat(v); } + //current jdk impl of parseFloat calls trim() on the string. + //Any other space is illegal anyway, whether there are one or more spaces. + //so no need to do a collapse pass through the string. + if (strict) { + checkFloatingPointLexical(cs); + } else if (cs.length() > 1) { + char ch = cs.charAt(cs.length() - 1); + if ((ch == 'f' || ch == 'F') && cs.charAt(cs.length() - 2) != 'N') { + throw new NumberFormatException("Invalid char '" + ch + "' in float."); + } + } + return Float.parseFloat(v); } public static float lexFloat(CharSequence cs, Collection errors) { @@ -90,6 +142,25 @@ public static String printFloat(float value) { // ======================== double ======================== public static double lexDouble(CharSequence cs) + throws NumberFormatException { + return lexDouble(cs, false); + } + + /** + * Parses an xsd:double lexical value. + * + * @param cs the lexical value + * @param strict when {@code true}, lexical forms that {@link Double#parseDouble} accepts + * but XSD does not are rejected: hexadecimal floats ({@code 0x1p4}), the + * Java {@code Infinity} token, and a trailing type suffix + * ({@code f}/{@code F}/{@code d}/{@code D}). When {@code false} the + * long-standing lenient behaviour applies. Driven by + * {@link org.apache.xmlbeans.XmlOptions#setLoadStrictFloatingPoint()}. + * @return the parsed double + * @throws NumberFormatException if the value is not a valid xsd:double + * @since 5.4.0 + */ + public static double lexDouble(CharSequence cs, boolean strict) throws NumberFormatException { final String v = cs.toString(); switch (v) { @@ -99,18 +170,19 @@ public static double lexDouble(CharSequence cs) return Double.NEGATIVE_INFINITY; case NAN_LEX: return Double.NaN; - default: - //current jdk impl of parseDouble calls trim() on the string. - //Any other space is illegal anyway, whether there are one or more spaces. - //so no need to do a collapse pass through the string. - if (cs.length() > 0) { - char ch = cs.charAt(cs.length() - 1); - if (ch == 'd' || ch == 'D') { - throw new NumberFormatException("Invalid char '" + ch + "' in double."); - } - } - return Double.parseDouble(v); } + //current jdk impl of parseDouble calls trim() on the string. + //Any other space is illegal anyway, whether there are one or more spaces. + //so no need to do a collapse pass through the string. + if (strict) { + checkFloatingPointLexical(cs); + } else if (cs.length() > 0) { + char ch = cs.charAt(cs.length() - 1); + if (ch == 'd' || ch == 'D') { + throw new NumberFormatException("Invalid char '" + ch + "' in double."); + } + } + return Double.parseDouble(v); } public static double lexDouble(CharSequence cs, Collection errors) { diff --git a/src/main/java/org/apache/xmlbeans/impl/values/JavaDoubleHolder.java b/src/main/java/org/apache/xmlbeans/impl/values/JavaDoubleHolder.java index 2ee47c13d..72759833c 100644 --- a/src/main/java/org/apache/xmlbeans/impl/values/JavaDoubleHolder.java +++ b/src/main/java/org/apache/xmlbeans/impl/values/JavaDoubleHolder.java @@ -52,12 +52,17 @@ public static String serialize(double d) { } protected void set_text(String s) { - set_double(validateLexical(s, _voorVc)); + boolean strict = has_store() && get_store().get_locale().isLoadStrictFloatingPoint(); + set_double(validateLexical(s, _voorVc, strict)); } public static double validateLexical(String v, ValidationContext context) { + return validateLexical(v, context, false); + } + + public static double validateLexical(String v, ValidationContext context, boolean strict) { try { - return XsTypeConverter.lexDouble(v); + return XsTypeConverter.lexDouble(v, strict); } catch (NumberFormatException e) { context.invalid(XmlErrorCodes.DOUBLE, new Object[]{v}); diff --git a/src/main/java/org/apache/xmlbeans/impl/values/JavaFloatHolder.java b/src/main/java/org/apache/xmlbeans/impl/values/JavaFloatHolder.java index e2365c7cb..a24c18f7d 100644 --- a/src/main/java/org/apache/xmlbeans/impl/values/JavaFloatHolder.java +++ b/src/main/java/org/apache/xmlbeans/impl/values/JavaFloatHolder.java @@ -52,12 +52,17 @@ public static String serialize(float f) { } protected void set_text(String s) { - set_float(validateLexical(s, _voorVc)); + boolean strict = has_store() && get_store().get_locale().isLoadStrictFloatingPoint(); + set_float(validateLexical(s, _voorVc, strict)); } public static float validateLexical(String v, ValidationContext context) { + return validateLexical(v, context, false); + } + + public static float validateLexical(String v, ValidationContext context, boolean strict) { try { - return XsTypeConverter.lexFloat(v); + return XsTypeConverter.lexFloat(v, strict); } catch (NumberFormatException e) { context.invalid(XmlErrorCodes.FLOAT, new Object[]{v}); diff --git a/src/test/java/misc/checkin/XmlOptionsTest.java b/src/test/java/misc/checkin/XmlOptionsTest.java index 622b7ac8e..e7ee411df 100644 --- a/src/test/java/misc/checkin/XmlOptionsTest.java +++ b/src/test/java/misc/checkin/XmlOptionsTest.java @@ -30,4 +30,14 @@ void testUnsynchronizedFlag() { xmlOptions.setUnsynchronized(false); assertFalse(xmlOptions.isUnsynchronized()); } + + @Test + void testLoadStrictFloatingPointFlag() { + XmlOptions xmlOptions = new XmlOptions(); + assertFalse(xmlOptions.isLoadStrictFloatingPoint()); + xmlOptions.setLoadStrictFloatingPoint(); + assertTrue(xmlOptions.isLoadStrictFloatingPoint()); + xmlOptions.setLoadStrictFloatingPoint(false); + assertFalse(xmlOptions.isLoadStrictFloatingPoint()); + } } diff --git a/src/test/java/misc/checkin/XsTypeConverterTest.java b/src/test/java/misc/checkin/XsTypeConverterTest.java index cf22bcfeb..702d2233c 100644 --- a/src/test/java/misc/checkin/XsTypeConverterTest.java +++ b/src/test/java/misc/checkin/XsTypeConverterTest.java @@ -14,7 +14,11 @@ */ package misc.checkin; +import org.apache.xmlbeans.XmlDouble; +import org.apache.xmlbeans.XmlFloat; +import org.apache.xmlbeans.XmlOptions; import org.apache.xmlbeans.impl.util.XsTypeConverter; +import org.apache.xmlbeans.impl.values.XmlValueOutOfRangeException; import org.junit.jupiter.api.Test; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -75,6 +79,91 @@ void lexFloatAcceptsValidValues() { assertEquals(1.0f, XsTypeConverter.lexFloat("1.0")); assertEquals(Float.POSITIVE_INFINITY, XsTypeConverter.lexFloat("INF")); assertEquals(Float.NEGATIVE_INFINITY, XsTypeConverter.lexFloat("-INF")); + assertEquals(1500.0f, XsTypeConverter.lexFloat("1.5e3")); + } + + @Test + void lexFloatLenientAcceptsJavaForms() { + // the default stays lenient: hex floats and the java "Infinity" spelling + // are accepted by Float.parseFloat, so lexFloat keeps accepting them + assertEquals(16.0f, XsTypeConverter.lexFloat("0x1p4")); + assertEquals(Float.POSITIVE_INFINITY, XsTypeConverter.lexFloat("Infinity")); + } + + @Test + void lexFloatStrictRejectsNonXsdLexicalForms() { + // hex floats, the java "Infinity" spelling and the f/F/d/D suffix are + // accepted by Float.parseFloat but are outside the xsd:float lexical space + assertThrows(NumberFormatException.class, () -> XsTypeConverter.lexFloat("0x1p4", true)); + assertThrows(NumberFormatException.class, () -> XsTypeConverter.lexFloat("Infinity", true)); + assertThrows(NumberFormatException.class, () -> XsTypeConverter.lexFloat("-Infinity", true)); + assertThrows(NumberFormatException.class, () -> XsTypeConverter.lexFloat("1.0d", true)); + assertThrows(NumberFormatException.class, () -> XsTypeConverter.lexFloat("1D", true)); + assertThrows(NumberFormatException.class, () -> XsTypeConverter.lexFloat("1.0f", true)); + } + + @Test + void lexFloatStrictAcceptsValidValues() { + assertEquals(1.0f, XsTypeConverter.lexFloat("1.0", true)); + assertEquals(1500.0f, XsTypeConverter.lexFloat("1.5e3", true)); + assertEquals(Float.POSITIVE_INFINITY, XsTypeConverter.lexFloat("INF", true)); + assertEquals(Float.NEGATIVE_INFINITY, XsTypeConverter.lexFloat("-INF", true)); + assertEquals(Float.NaN, XsTypeConverter.lexFloat("NaN", true)); + } + + @Test + void lexDoubleAcceptsValidValues() { + assertEquals(1.0, XsTypeConverter.lexDouble("1.0")); + assertEquals(Double.POSITIVE_INFINITY, XsTypeConverter.lexDouble("INF")); + assertEquals(Double.NEGATIVE_INFINITY, XsTypeConverter.lexDouble("-INF")); + assertEquals(1500.0, XsTypeConverter.lexDouble("1.5e3")); + } + + @Test + void lexDoubleLenientAcceptsJavaForms() { + assertEquals(16.0, XsTypeConverter.lexDouble("0x1p4")); + assertEquals(Double.POSITIVE_INFINITY, XsTypeConverter.lexDouble("Infinity")); + } + + @Test + void lexDoubleStrictRejectsNonXsdLexicalForms() { + // hex floats, the java "Infinity" spelling and the f/F/d/D suffix are + // accepted by Double.parseDouble but are outside the xsd:double lexical space + assertThrows(NumberFormatException.class, () -> XsTypeConverter.lexDouble("0x1p4", true)); + assertThrows(NumberFormatException.class, () -> XsTypeConverter.lexDouble("Infinity", true)); + assertThrows(NumberFormatException.class, () -> XsTypeConverter.lexDouble("-Infinity", true)); + assertThrows(NumberFormatException.class, () -> XsTypeConverter.lexDouble("1.0f", true)); + assertThrows(NumberFormatException.class, () -> XsTypeConverter.lexDouble("1F", true)); + assertThrows(NumberFormatException.class, () -> XsTypeConverter.lexDouble("1.0d", true)); + } + + @Test + void lexDoubleStrictAcceptsValidValues() { + assertEquals(1.0, XsTypeConverter.lexDouble("1.0", true)); + assertEquals(1500.0, XsTypeConverter.lexDouble("1.5e3", true)); + assertEquals(Double.POSITIVE_INFINITY, XsTypeConverter.lexDouble("INF", true)); + assertEquals(Double.NEGATIVE_INFINITY, XsTypeConverter.lexDouble("-INF", true)); + assertEquals(Double.NaN, XsTypeConverter.lexDouble("NaN", true)); + } + + @Test + void loadStrictFloatingPointOptionGatesFloatParsing() throws Exception { + // default load is lenient: the hex float parses as it always has + assertEquals(16.0f, XmlFloat.Factory.parse("0x1p4").getFloatValue()); + + // with the option set, the value is out of the xsd:float lexical space + XmlOptions strict = new XmlOptions().setLoadStrictFloatingPoint(); + assertThrows(XmlValueOutOfRangeException.class, () -> + XmlFloat.Factory.parse("0x1p4", strict).getFloatValue()); + } + + @Test + void loadStrictFloatingPointOptionGatesDoubleParsing() throws Exception { + assertEquals(16.0, XmlDouble.Factory.parse("0x1p4").getDoubleValue()); + + XmlOptions strict = new XmlOptions().setLoadStrictFloatingPoint(); + assertThrows(XmlValueOutOfRangeException.class, () -> + XmlDouble.Factory.parse("0x1p4", strict).getDoubleValue()); } @Test