diff --git a/core/src/main/java/org/apache/datafusion/ExceptionVerbosity.java b/core/src/main/java/org/apache/datafusion/ExceptionVerbosity.java new file mode 100644 index 0000000..ef57824 --- /dev/null +++ b/core/src/main/java/org/apache/datafusion/ExceptionVerbosity.java @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datafusion; + +/** + * Controls how detailed the representation of a Java throwable is when a JVM upcall (UDF, {@link + * TableProvider}) throws and the failure has to cross back into native code. + * + *

Set on {@link SessionContextBuilder#exceptionVerbosity}. The setting is locked at + * session-construction time and applies uniformly to every UDF and table provider registered + * against the session. + */ +public enum ExceptionVerbosity { + /** Class name + {@code getMessage()} + standard Java stack trace. The default. */ + FULL, + /** + * Class name + {@code getMessage()} only. Matches the behaviour of `datafusion-java` before this + * setting existed; useful when full traces are too verbose for production logs. + */ + MESSAGE, + /** + * Class name only. For callers who treat exception bodies as untrusted user input that shouldn't + * reach logs. + */ + NONE; + + /** + * Byte tag passed through {@code SessionContext.registerScalarUdf} / {@code registerTableNative} + * to the native side. {@code 1=FULL}, {@code 2=MESSAGE}, {@code 3=NONE}; {@code 0} is reserved as + * a "bogus" sentinel and rejected by the Rust decoder. + */ + byte toByte() { + switch (this) { + case FULL: + return 1; + case MESSAGE: + return 2; + case NONE: + return 3; + default: + throw new IllegalStateException("unreachable: unknown ExceptionVerbosity " + this); + } + } +} diff --git a/core/src/main/java/org/apache/datafusion/SessionContext.java b/core/src/main/java/org/apache/datafusion/SessionContext.java index ffc58dd..c459c25 100644 --- a/core/src/main/java/org/apache/datafusion/SessionContext.java +++ b/core/src/main/java/org/apache/datafusion/SessionContext.java @@ -49,19 +49,27 @@ public final class SessionContext implements AutoCloseable { } private long nativeHandle; + // Snapshot of the builder's exceptionVerbosity, passed through to UDF / + // TableProvider registrations. Locked at construction time; not mutable. + // Default FULL keeps stack traces flowing for callers that don't touch the + // setter. Stored as a byte to keep the JNI signature ABI-stable across any + // future ExceptionVerbosity additions. + private final byte exceptionVerbosity; public SessionContext() { this.nativeHandle = createSessionContext(); if (this.nativeHandle == 0) { throw new RuntimeException("Failed to create native SessionContext"); } + this.exceptionVerbosity = ExceptionVerbosity.FULL.toByte(); } - SessionContext(byte[] optionsBytes) { + SessionContext(byte[] optionsBytes, ExceptionVerbosity exceptionVerbosity) { this.nativeHandle = createSessionContextWithOptions(optionsBytes); if (this.nativeHandle == 0) { throw new RuntimeException("Failed to create native SessionContext"); } + this.exceptionVerbosity = exceptionVerbosity.toByte(); } /** Start configuring a {@link SessionContext}. */ @@ -550,7 +558,8 @@ public void registerUdf(ScalarUdf udf) { fields.addAll(udf.argFields()); Schema signatureSchema = new Schema(fields); byte[] signatureBytes = serializeSchemaIpc(signatureSchema); - registerScalarUdf(nativeHandle, name, signatureBytes, volatility.code(), impl); + registerScalarUdf( + nativeHandle, name, signatureBytes, volatility.code(), impl, exceptionVerbosity); } /** @@ -585,7 +594,7 @@ public void registerTable(String name, TableProvider provider) { throw new IllegalStateException("TableProvider.schema returned null"); } byte[] schemaIpc = serializeSchemaIpc(schema); - registerTableNative(nativeHandle, name, schemaIpc, provider); + registerTableNative(nativeHandle, name, schemaIpc, provider, exceptionVerbosity); } private static byte[] serializeSchemaIpc(Schema schema) { @@ -660,8 +669,17 @@ private static native long readJsonWithOptions( private static native void closeSessionContext(long handle); private static native void registerScalarUdf( - long handle, String name, byte[] signatureSchemaBytes, byte volatility, ScalarFunction impl); + long handle, + String name, + byte[] signatureSchemaBytes, + byte volatility, + ScalarFunction impl, + byte exceptionVerbosity); private static native void registerTableNative( - long handle, String name, byte[] schemaIpcBytes, TableProvider provider); + long handle, + String name, + byte[] schemaIpcBytes, + TableProvider provider, + byte exceptionVerbosity); } diff --git a/core/src/main/java/org/apache/datafusion/SessionContextBuilder.java b/core/src/main/java/org/apache/datafusion/SessionContextBuilder.java index 81c59de..25ead62 100644 --- a/core/src/main/java/org/apache/datafusion/SessionContextBuilder.java +++ b/core/src/main/java/org/apache/datafusion/SessionContextBuilder.java @@ -43,6 +43,7 @@ public final class SessionContextBuilder { private String tempDirectory; private boolean spillDisabled; private Long maxTempDirectorySize; + private ExceptionVerbosity exceptionVerbosity; private CacheManagerOptions cacheManager; private final LinkedHashMap options = new LinkedHashMap<>(); private final List objectStores = new ArrayList<>(); @@ -208,6 +209,24 @@ public SessionContextBuilder setOptions(Map entries) { return this; } + /** + * Configure how detailed the representation of a Java throwable is when a JVM upcall (UDF, {@link + * TableProvider}) throws and the failure has to cross back into native code. + * + *

The setting is locked at session-construction time and applies uniformly to every UDF and + * table provider registered against the session. See {@link ExceptionVerbosity} for the three + * values; default (unset) is {@link ExceptionVerbosity#FULL}. + * + * @throws IllegalArgumentException if {@code verbosity} is {@code null}. + */ + public SessionContextBuilder exceptionVerbosity(ExceptionVerbosity verbosity) { + if (verbosity == null) { + throw new IllegalArgumentException("exceptionVerbosity must be non-null"); + } + this.exceptionVerbosity = verbosity; + return this; + } + /** * Configure DataFusion's built-in {@code CacheManager} for the new context. Build the {@link * CacheManagerOptions} via {@link CacheManagerOptions#builder()}; each cache slot is independent, @@ -264,7 +283,7 @@ public SessionContext build() { throw new IllegalStateException( "disableSpill() is mutually exclusive with tempDirectory(...)"); } - return new SessionContext(toBytes()); + return new SessionContext(toBytes(), exceptionVerbosityOrDefault()); } byte[] toBytes() { @@ -306,6 +325,9 @@ byte[] toBytes() { if (cacheManager != null) { b.setCacheManager(cacheManager.toProto()); } + // exceptionVerbosity flows via SessionContext's snapshot field, not the + // SessionOptions proto -- it's only consumed at registerUdf / + // registerTable JNI calls, where it rides on its own arg. for (Map.Entry e : options.entrySet()) { b.addOptions(ConfigOption.newBuilder().setKey(e.getKey()).setValue(e.getValue()).build()); } @@ -314,4 +336,9 @@ byte[] toBytes() { } return b.build().toByteArray(); } + + /** Visible for {@link SessionContext} so it can fall back to the FULL byte when unset. */ + ExceptionVerbosity exceptionVerbosityOrDefault() { + return exceptionVerbosity != null ? exceptionVerbosity : ExceptionVerbosity.FULL; + } } diff --git a/core/src/test/java/org/apache/datafusion/ExceptionVerbosityTest.java b/core/src/test/java/org/apache/datafusion/ExceptionVerbosityTest.java new file mode 100644 index 0000000..22b121c --- /dev/null +++ b/core/src/test/java/org/apache/datafusion/ExceptionVerbosityTest.java @@ -0,0 +1,264 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datafusion; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.util.List; + +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.vector.ipc.ArrowReader; +import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.types.pojo.FieldType; +import org.junit.jupiter.api.Test; + +/** Drives the three {@link ExceptionVerbosity} settings end-to-end through a throwing UDF. */ +class ExceptionVerbosityTest { + + /** + * Sentinel string we look for in the formatted output. Lives on the throwing UDF's call frame, so + * a {@code FULL} stack trace must mention this method by name. + */ + private static final String STACK_FRAME_MARKER = "ExceptionVerbosityTest$BoomUdf.evaluate"; + + /** Sentinel message the UDF throws; used to check {@code MESSAGE} / {@code FULL} verbosity. */ + private static final String USER_MESSAGE = "boom-from-test-12345"; + + private static final ArrowType.Int INT32 = new ArrowType.Int(32, true); + + static final class BoomUdf implements ScalarFunction { + @Override + public String name() { + return "boom"; + } + + @Override + public List argFields() { + return List.of(new Field("x", FieldType.nullable(INT32), null)); + } + + @Override + public Field returnField() { + return new Field("y", FieldType.nullable(INT32), null); + } + + @Override + public Volatility volatility() { + return Volatility.IMMUTABLE; + } + + @Override + public ColumnarValue evaluate(BufferAllocator allocator, ScalarFunctionArgs args) { + throw new IllegalStateException(USER_MESSAGE); + } + } + + private static ScalarUdf newBoomUdf() { + return new ScalarUdf(new BoomUdf()); + } + + // ---- builder + enum surface ------------------------------------------ + + @Test + void enumValuesAreFullMessageNone() { + // Pin the enum so a future rename / reorder is a deliberate decision, not + // a silent bytecode-level change for the JNI byte tag. + assertEquals(3, ExceptionVerbosity.values().length); + assertEquals(ExceptionVerbosity.FULL, ExceptionVerbosity.valueOf("FULL")); + assertEquals(ExceptionVerbosity.MESSAGE, ExceptionVerbosity.valueOf("MESSAGE")); + assertEquals(ExceptionVerbosity.NONE, ExceptionVerbosity.valueOf("NONE")); + } + + @Test + void builderRejectsNullVerbosity() { + assertThrows( + IllegalArgumentException.class, () -> SessionContext.builder().exceptionVerbosity(null)); + } + + @Test + void builderAcceptsAllThreeAndReturnsContext() { + for (ExceptionVerbosity v : ExceptionVerbosity.values()) { + try (SessionContext ctx = SessionContext.builder().exceptionVerbosity(v).build()) { + assertNotNull(ctx); + } + } + } + + // ---- end-to-end UDF routing ------------------------------------------ + + @Test + void defaultIsFullStackTrace() { + // No setter call -- the no-arg constructor and builder-default code paths + // both have to default to FULL. + try (SessionContext ctx = new SessionContext()) { + String message = runBoomUdfMessageOn(ctx); + assertContains("default verbosity (no-arg ctor)", message, "IllegalStateException"); + assertContains("default verbosity (no-arg ctor)", message, USER_MESSAGE); + assertContains("default verbosity (no-arg ctor)", message, STACK_FRAME_MARKER); + } + String message = runBoomUdfMessage(SessionContext.builder()); + assertContains("default verbosity (builder)", message, "IllegalStateException"); + assertContains("default verbosity (builder)", message, USER_MESSAGE); + assertContains("default verbosity (builder)", message, STACK_FRAME_MARKER); + } + + @Test + void fullIncludesClassMessageAndStackTrace() { + String message = + runBoomUdfMessage(SessionContext.builder().exceptionVerbosity(ExceptionVerbosity.FULL)); + assertContains("FULL verbosity", message, "IllegalStateException"); + assertContains("FULL verbosity", message, USER_MESSAGE); + assertContains("FULL verbosity", message, STACK_FRAME_MARKER); + } + + @Test + void messageIncludesClassAndMessageButNoStackTrace() { + String message = + runBoomUdfMessage(SessionContext.builder().exceptionVerbosity(ExceptionVerbosity.MESSAGE)); + assertContains("MESSAGE verbosity", message, "IllegalStateException"); + assertContains("MESSAGE verbosity", message, USER_MESSAGE); + assertDoesNotContain("MESSAGE verbosity", message, STACK_FRAME_MARKER); + // The "\n\tat ..." prefix is the canonical Java stack-trace marker; it + // must not appear anywhere in the MESSAGE-verbosity output. + assertDoesNotContain("MESSAGE verbosity", message, "\n\tat "); + } + + @Test + void noneIncludesClassOnly() { + String message = + runBoomUdfMessage(SessionContext.builder().exceptionVerbosity(ExceptionVerbosity.NONE)); + assertContains("NONE verbosity", message, "IllegalStateException"); + assertDoesNotContain("NONE verbosity", message, USER_MESSAGE); + assertDoesNotContain("NONE verbosity", message, STACK_FRAME_MARKER); + } + + // ---- TableProvider path ---------------------------------------------- + + /** TableProvider whose scan() throws a marker-message exception. */ + static final class BoomTableProvider implements TableProvider { + @Override + public org.apache.arrow.vector.types.pojo.Schema schema() { + return new org.apache.arrow.vector.types.pojo.Schema( + List.of(new Field("id", FieldType.nullable(INT32), null))); + } + + @Override + public org.apache.arrow.vector.ipc.ArrowReader scan(BufferAllocator allocator) { + throw new IllegalStateException(USER_MESSAGE); + } + } + + private static final String TP_FRAME_MARKER = "ExceptionVerbosityTest$BoomTableProvider.scan"; + + @Test + void tableProviderFullStackTrace() { + String message = + runBoomTableScanMessage( + SessionContext.builder().exceptionVerbosity(ExceptionVerbosity.FULL)); + assertContains("TP FULL", message, "IllegalStateException"); + assertContains("TP FULL", message, USER_MESSAGE); + assertContains("TP FULL", message, TP_FRAME_MARKER); + } + + @Test + void tableProviderMessageOnly() { + String message = + runBoomTableScanMessage( + SessionContext.builder().exceptionVerbosity(ExceptionVerbosity.MESSAGE)); + assertContains("TP MESSAGE", message, "IllegalStateException"); + assertContains("TP MESSAGE", message, USER_MESSAGE); + assertDoesNotContain("TP MESSAGE", message, TP_FRAME_MARKER); + } + + @Test + void tableProviderClassOnly() { + String message = + runBoomTableScanMessage( + SessionContext.builder().exceptionVerbosity(ExceptionVerbosity.NONE)); + assertContains("TP NONE", message, "IllegalStateException"); + assertDoesNotContain("TP NONE", message, USER_MESSAGE); + assertDoesNotContain("TP NONE", message, TP_FRAME_MARKER); + } + + private static String runBoomTableScanMessage(SessionContextBuilder b) { + try (SessionContext ctx = b.build(); + BufferAllocator allocator = new RootAllocator()) { + ctx.registerTable("boom_tp", new BoomTableProvider()); + RuntimeException ex = + assertThrows( + RuntimeException.class, + () -> { + try (DataFrame df = ctx.sql("SELECT id FROM boom_tp"); + ArrowReader r = df.collect(allocator)) { + while (r.loadNextBatch()) {} + } + }); + String message = ex.getMessage(); + assertNotNull(message, "expected exception to carry a message"); + return message; + } + } + + // ---- helpers ---------------------------------------------------------- + + /** Convenience: build a context off the supplied builder, run the UDF, return the message. */ + private static String runBoomUdfMessage(SessionContextBuilder b) { + try (SessionContext ctx = b.build()) { + return runBoomUdfMessageOn(ctx); + } + } + + /** Register the BoomUdf on {@code ctx} and capture the {@link RuntimeException} message. */ + private static String runBoomUdfMessageOn(SessionContext ctx) { + try (BufferAllocator allocator = new RootAllocator()) { + ctx.registerUdf(newBoomUdf()); + RuntimeException ex = + assertThrows( + RuntimeException.class, + () -> { + try (DataFrame df = ctx.sql("SELECT boom(CAST(1 AS INT))"); + ArrowReader r = df.collect(allocator)) { + while (r.loadNextBatch()) {} + } + }); + String message = ex.getMessage(); + assertNotNull(message, "expected exception to carry a message"); + return message; + } + } + + private static void assertContains(String label, String haystack, String needle) { + assertTrue( + haystack != null && haystack.contains(needle), + label + ": expected to contain \"" + needle + "\", got: " + haystack); + } + + private static void assertDoesNotContain(String label, String haystack, String needle) { + assertFalse( + haystack != null && haystack.contains(needle), + label + ": expected to NOT contain \"" + needle + "\", got: " + haystack); + } +} diff --git a/docs/source/user-guide/scalar-udf.md b/docs/source/user-guide/scalar-udf.md index 55f2d24..041029c 100644 --- a/docs/source/user-guide/scalar-udf.md +++ b/docs/source/user-guide/scalar-udf.md @@ -167,12 +167,26 @@ non-deterministic functions. ## Errors -If the UDF throws, the exception class and message surface in the -`RuntimeException` raised from `collect()`. If the returned `ColumnarValue` is -`null`, an Array result's vector length does not equal `args.rowCount()`, or -the result's Arrow type differs from the declared return field, the runtime -raises a `RuntimeException` with a descriptive message. A Scalar result whose -vector is not length-1 is rejected at the `ColumnarValue.scalar` factory. +If the UDF throws, the exception class, message, and Java stack trace surface +in the `RuntimeException` raised from `collect()`. The default verbosity is +`FULL` (class + message + stack trace, in `Throwable.printStackTrace` format); +configure it on the session if a less-verbose representation is preferred: + +```java +SessionContext.builder() + .exceptionVerbosity(ExceptionVerbosity.MESSAGE) // class + message only + .build(); +``` + +`ExceptionVerbosity.NONE` surfaces only the exception class. The verbosity is +locked at session-construction time and applies to every UDF and table +provider registered against the session. + +If the returned `ColumnarValue` is `null`, an Array result's vector length +does not equal `args.rowCount()`, or the result's Arrow type differs from the +declared return field, the runtime raises a `RuntimeException` with a +descriptive message. A Scalar result whose vector is not length-1 is rejected +at the `ColumnarValue.scalar` factory. ## Threading diff --git a/docs/source/user-guide/table-provider.md b/docs/source/user-guide/table-provider.md index 7eed07d..1129c1d 100644 --- a/docs/source/user-guide/table-provider.md +++ b/docs/source/user-guide/table-provider.md @@ -98,8 +98,10 @@ try (SessionContext ctx = new SessionContext(); Exceptions thrown from `scan()` or from the returned reader surface in the `RuntimeException` raised by `collect()`. The error message includes the Java -exception class and `getMessage()`, in the same format used for scalar UDF -errors. +exception class, `getMessage()`, and (at the default `FULL` verbosity) the +Java stack trace. See [scalar-udf.md → Errors](scalar-udf.md#errors) for the +session-wide verbosity setter — it applies uniformly to UDFs and table +providers. ## Threading diff --git a/examples/src/main/java/org/apache/datafusion/examples/ExceptionVerbosityExample.java b/examples/src/main/java/org/apache/datafusion/examples/ExceptionVerbosityExample.java new file mode 100644 index 0000000..e2e1793 --- /dev/null +++ b/examples/src/main/java/org/apache/datafusion/examples/ExceptionVerbosityExample.java @@ -0,0 +1,107 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datafusion.examples; + +import java.util.List; + +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.vector.ipc.ArrowReader; +import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.datafusion.ColumnarValue; +import org.apache.datafusion.DataFrame; +import org.apache.datafusion.ExceptionVerbosity; +import org.apache.datafusion.ScalarFunction; +import org.apache.datafusion.ScalarFunctionArgs; +import org.apache.datafusion.ScalarUdf; +import org.apache.datafusion.SessionContext; +import org.apache.datafusion.Volatility; + +/** + * Demonstrates the three {@link ExceptionVerbosity} settings on the same throwing UDF. The same + * {@code BoomUdf} is invoked under each verbosity; the exception message attached to the resulting + * {@link RuntimeException} differs in detail. + * + *

Run with: + * + *

+ * ./mvnw -pl :datafusion-java-examples exec:exec \
+ *     -Dexec.mainClass=org.apache.datafusion.examples.ExceptionVerbosityExample
+ * 
+ */ +public final class ExceptionVerbosityExample { + + /** UDF that always throws. The message and stack frame are what the example surfaces. */ + static final class BoomUdf implements ScalarFunction { + @Override + public String name() { + return "boom"; + } + + @Override + public List argFields() { + return List.of(Field.nullable("x", new ArrowType.Int(32, true))); + } + + @Override + public Field returnField() { + return Field.nullable("y", new ArrowType.Int(32, true)); + } + + @Override + public Volatility volatility() { + return Volatility.IMMUTABLE; + } + + @Override + public ColumnarValue evaluate(BufferAllocator allocator, ScalarFunctionArgs args) { + throw new IllegalStateException("user-supplied input failed validation"); + } + } + + public static void main(String[] args) { + for (ExceptionVerbosity v : ExceptionVerbosity.values()) { + runWith(v); + } + } + + private static void runWith(ExceptionVerbosity verbosity) { + System.out.println("=== ExceptionVerbosity." + verbosity + " ==="); + try (SessionContext ctx = SessionContext.builder().exceptionVerbosity(verbosity).build(); + BufferAllocator allocator = new RootAllocator()) { + ctx.registerUdf(new ScalarUdf(new BoomUdf())); + try (DataFrame df = ctx.sql("SELECT boom(CAST(1 AS INT))"); + ArrowReader reader = df.collect(allocator)) { + while (reader.loadNextBatch()) { + // The UDF throws; we never reach this line. + } + } catch (RuntimeException e) { + System.out.println(e.getMessage()); + } catch (Exception e) { + // ArrowReader.close() is declared to throw; treat the same. + System.out.println(e.getMessage()); + } + } + System.out.println(); + } + + private ExceptionVerbosityExample() {} +} diff --git a/native/src/jni_util.rs b/native/src/jni_util.rs index daa2b63..84768c6 100644 --- a/native/src/jni_util.rs +++ b/native/src/jni_util.rs @@ -18,11 +18,44 @@ //! Small shared helpers for JNI call sites. use jni::objects::JThrowable; +use jni::sys::jbyte; use jni::JNIEnv; -/// Best-effort: extract class name and `getMessage()` from a Java throwable. -/// Anything that goes wrong collapses to a generic message so we don't -/// double-throw inside an error path. +/// Verbosity of the throwable representation produced when a JVM upcall +/// (UDF, TableProvider) throws. Mirrors the public Java `ExceptionVerbosity` +/// enum 1:1; the byte tag is what crosses the JNI boundary on +/// `registerScalarUdf` / `registerTableNative`. Byte 0 is reserved as a +/// "bogus" sentinel (rejected by `from_byte`) to make accidental zero-init +/// surface as a clear error rather than silently picking FULL. +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub(crate) enum ExceptionVerbosity { + /// Class name + message + Java stack trace. + Full, + /// Class name + message only (matches pre-#55 behaviour). + Message, + /// Class name only. + None, +} + +impl ExceptionVerbosity { + /// Decode the byte tag the Java side passes through `register*` JNI calls. + /// The mapping matches the proto enum: 1 = FULL, 2 = MESSAGE, 3 = NONE. + /// Anything else (including 0 = UNSPECIFIED) returns an error so the + /// caller can surface a clear "this byte is bogus" diagnostic instead of + /// silently defaulting to FULL. + pub(crate) fn from_byte(b: jbyte) -> Result { + match b { + 1 => Ok(Self::Full), + 2 => Ok(Self::Message), + 3 => Ok(Self::None), + _ => Err(format!("invalid ExceptionVerbosity byte: {b}")), + } + } +} + +/// Best-effort: render a Java throwable as a string at the requested +/// verbosity. Anything that goes wrong while inspecting the throwable +/// collapses to a placeholder so we don't double-throw inside an error path. /// /// `kind` and `name` are used to build the surfaced error message /// (e.g., `kind="UDF" name="add_one"` -> `"Java UDF 'add_one' threw ..."`). @@ -31,8 +64,45 @@ pub(crate) fn jthrowable_to_string( throwable: &JThrowable, kind: &str, name: &str, + verbosity: ExceptionVerbosity, ) -> String { - let class_name_result = (|| -> jni::errors::Result { + let class_name = jthrowable_class_name(env, throwable); + + if verbosity == ExceptionVerbosity::None { + return format!("Java {} '{}' threw {}", kind, name, class_name); + } + + let message = jthrowable_message(env, throwable); + let header = match message { + Some(m) => format!("Java {} '{}' threw {}: {}", kind, name, class_name, m), + None => format!("Java {} '{}' threw {}", kind, name, class_name), + }; + + if verbosity == ExceptionVerbosity::Message { + return header; + } + + // Full verbosity: append the Java-formatted stack trace. We delegate to + // `Throwable.printStackTrace(PrintWriter)` so the format matches every + // standard Java logger (header line + "\tat fqn(File:line)" frames). + match jthrowable_stack_trace(env, throwable) { + Ok(trace) => format!("{}\n{}", header, trace.trim_end()), + Err(_) => { + // Trace rendering can leave a pending JNI exception (e.g. a + // custom `Throwable.printStackTrace` overload that itself + // throws, or OOM during StringWriter/PrintWriter allocation). + // Clear it before returning -- a stale pending exception would + // poison the next JNI call and cause try_unwrap_or_throw to + // surface the secondary exception instead of the original + // DataFusionError. + env.exception_clear().ok(); + header + } + } +} + +fn jthrowable_class_name(env: &mut JNIEnv, throwable: &JThrowable) -> String { + let result = (|| -> jni::errors::Result { let class = env.call_method(throwable, "getClass", "()Ljava/lang/Class;", &[])?; let class_obj = class.l()?; let n = env.call_method(&class_obj, "getName", "()Ljava/lang/String;", &[])?; @@ -40,30 +110,47 @@ pub(crate) fn jthrowable_to_string( let n_str: String = env.get_string(&n_obj.into())?.into(); Ok(n_str) })(); - let class_name = match class_name_result { + match result { Ok(s) => s, Err(_) => { env.exception_clear().ok(); "".to_string() } - }; + } +} - let message_result = (|| -> jni::errors::Result { +fn jthrowable_message(env: &mut JNIEnv, throwable: &JThrowable) -> Option { + let result = (|| -> jni::errors::Result> { let msg = env.call_method(throwable, "getMessage", "()Ljava/lang/String;", &[])?; let msg_obj = msg.l()?; if msg_obj.is_null() { - return Ok("".to_string()); + return Ok(None); } let s: String = env.get_string(&msg_obj.into())?.into(); - Ok(s) + Ok(Some(s)) })(); - let message = match message_result { - Ok(s) => s, + match result { + Ok(opt) => opt, Err(_) => { env.exception_clear().ok(); - "".to_string() + None } - }; + } +} - format!("Java {} '{}' threw {}: {}", kind, name, class_name, message) +fn jthrowable_stack_trace(env: &mut JNIEnv, throwable: &JThrowable) -> jni::errors::Result { + let sw = env.new_object("java/io/StringWriter", "()V", &[])?; + let pw_args = [(&sw).into()]; + let pw = env.new_object("java/io/PrintWriter", "(Ljava/io/Writer;)V", &pw_args)?; + let trace_args = [(&pw).into()]; + env.call_method( + throwable, + "printStackTrace", + "(Ljava/io/PrintWriter;)V", + &trace_args, + )?; + let s = env.call_method(&sw, "toString", "()Ljava/lang/String;", &[])?; + let s_obj = s.l()?; + let s_str: String = env.get_string(&s_obj.into())?.into(); + Ok(s_str) } diff --git a/native/src/lib.rs b/native/src/lib.rs index 4fd7a8a..f901f3f 100644 --- a/native/src/lib.rs +++ b/native/src/lib.rs @@ -1204,6 +1204,7 @@ pub extern "system" fn Java_org_apache_datafusion_SessionContext_registerScalarU signature_schema_bytes: JByteArray<'local>, volatility: jni::sys::jbyte, udf: JObject<'local>, + exception_verbosity: jni::sys::jbyte, ) { try_unwrap_or_throw(&mut env, (), |env| -> JniResult<()> { if handle == 0 { @@ -1240,6 +1241,8 @@ pub extern "system" fn Java_org_apache_datafusion_SessionContext_registerScalarU "(Lorg/apache/datafusion/ScalarFunction;JJJJ[BJJI)B", )?; + let verbosity = crate::jni_util::ExceptionVerbosity::from_byte(exception_verbosity)?; + let java_udf = crate::udf::JavaScalarUdf { name: name.clone(), signature, @@ -1247,6 +1250,7 @@ pub extern "system" fn Java_org_apache_datafusion_SessionContext_registerScalarU udf_global_ref, bridge_class, invoke_method, + verbosity, }; ctx.register_udf(ScalarUDF::new_from_impl(java_udf)); Ok(()) @@ -1261,6 +1265,7 @@ pub extern "system" fn Java_org_apache_datafusion_SessionContext_registerTableNa name: JString<'local>, schema_ipc_bytes: JByteArray<'local>, provider: JObject<'local>, + exception_verbosity: jni::sys::jbyte, ) { try_unwrap_or_throw(&mut env, (), |env| -> JniResult<()> { if handle == 0 { @@ -1283,12 +1288,15 @@ pub extern "system" fn Java_org_apache_datafusion_SessionContext_registerTableNa "(Lorg/apache/datafusion/TableProvider;J)V", )?; + let verbosity = crate::jni_util::ExceptionVerbosity::from_byte(exception_verbosity)?; + let java_tp = crate::table_provider::JavaTableProvider { name: name.clone(), schema, source_global_ref, bridge_class, invoke_method, + verbosity, }; let _ = ctx.register_table(name.as_str(), Arc::new(java_tp))?; Ok(()) diff --git a/native/src/table_provider.rs b/native/src/table_provider.rs index 70eaac2..4f887b9 100644 --- a/native/src/table_provider.rs +++ b/native/src/table_provider.rs @@ -47,7 +47,7 @@ use jni::objects::{GlobalRef, JStaticMethodID}; use jni::signature::{Primitive, ReturnType}; use jni::sys::{jlong, jvalue}; -use crate::jni_util::jthrowable_to_string; +use crate::jni_util::{jthrowable_to_string, ExceptionVerbosity}; pub(crate) struct JavaTableProvider { pub(crate) name: String, @@ -55,6 +55,9 @@ pub(crate) struct JavaTableProvider { pub(crate) source_global_ref: Arc, pub(crate) bridge_class: Arc, pub(crate) invoke_method: JStaticMethodID, + /// Verbosity applied when `TableProvider.scan` throws. Locked at + /// registration time; same flow as `JavaScalarUdf.verbosity`. + pub(crate) verbosity: ExceptionVerbosity, } // SAFETY: see the matching unsafe impls on JavaScalarUdf. The GlobalRefs keep @@ -111,6 +114,7 @@ impl TableProvider for JavaTableProvider { source_global_ref: Arc::clone(&self.source_global_ref), bridge_class: Arc::clone(&self.bridge_class), invoke_method: self.invoke_method, + verbosity: self.verbosity, plan_properties, })) } @@ -124,6 +128,7 @@ pub(crate) struct JavaScanExec { source_global_ref: Arc, bridge_class: Arc, invoke_method: JStaticMethodID, + verbosity: ExceptionVerbosity, plan_properties: Arc, } @@ -226,6 +231,7 @@ impl ExecutionPlan for JavaScanExec { &throwable, "TableProvider", &self.name, + self.verbosity, ))); } diff --git a/native/src/udf.rs b/native/src/udf.rs index da7b260..464a19b 100644 --- a/native/src/udf.rs +++ b/native/src/udf.rs @@ -46,6 +46,10 @@ pub(crate) struct JavaScalarUdf { pub(crate) bridge_class: GlobalRef, /// Method ID for `JniBridge.invokeScalarUdf`. pub(crate) invoke_method: JStaticMethodID, + /// Verbosity to apply when this UDF's `evaluate` throws. Snapshotted from + /// the registering `SessionContext` -- locked at registration time, like + /// the rest of this struct. + pub(crate) verbosity: crate::jni_util::ExceptionVerbosity, } // SAFETY: JStaticMethodID is a JNI handle that's safe to share because the @@ -243,8 +247,13 @@ impl ScalarUDFImpl for JavaScalarUdf { DataFusionError::Execution(format!("exception_occurred failed: {}", e)) })?; env.exception_clear().ok(); - let message = - crate::jni_util::jthrowable_to_string(&mut env, &throwable, "UDF", &self.name); + let message = crate::jni_util::jthrowable_to_string( + &mut env, + &throwable, + "UDF", + &self.name, + self.verbosity, + ); return Err(DataFusionError::Execution(message)); }