From 9c0a0a72a0d09cbc6ec7215417e379ba413b8bda Mon Sep 17 00:00:00 2001 From: Marco Grassi Date: Mon, 29 Jun 2026 10:55:58 +0200 Subject: [PATCH] feat: add getDataframeWithMetadata for RDFReader Closes #62. Add support for returning SPARQL SELECT binding metadata according to the SPARQL JSON Results format. Each binding now includes its value, RDF term type (URI, blank node, or literal), datatype (e.g., xsd:string), and language tag when applicable. This functionality is exposed through a new method rather than getDataframe(), preserving backward compatibility while making the feature opt-in. --- .../cefriel/template/io/rdf/RDFReader.java | 53 ++++++++++ .../com/cefriel/template/RDFReaderTests.java | 98 +++++++++++++++++++ src/test/resources/metadata/input.ttl | 15 +++ src/test/resources/metadata/output.ttl | 15 +++ src/test/resources/metadata/template.vm | 10 ++ 5 files changed, 191 insertions(+) create mode 100644 src/test/resources/metadata/input.ttl create mode 100644 src/test/resources/metadata/output.ttl create mode 100644 src/test/resources/metadata/template.vm diff --git a/src/main/java/com/cefriel/template/io/rdf/RDFReader.java b/src/main/java/com/cefriel/template/io/rdf/RDFReader.java index 4f2f35b..820ff75 100644 --- a/src/main/java/com/cefriel/template/io/rdf/RDFReader.java +++ b/src/main/java/com/cefriel/template/io/rdf/RDFReader.java @@ -170,6 +170,59 @@ private List> getQueryResultsStringValue(String query) { return new ArrayList<>(dataframe); } + /** + * Executes a SPARQL query returning a list of rows as {@code List>>}. + * Each element of the list is a row mapping every variable name to a metadata map describing the + * bound value. The metadata map mirrors the SPARQL JSON results structure and may contain the keys + * {@code value}, {@code type} , {@code datatype} and {@code lang}. + * If {@code hashVariable} is set, variable names are hashed; if {@code onlyDistinct} is set, duplicate rows are removed. + * @param query SPARQL query to be executed + * @return Result of the SPARQL query with per-binding metadata maps + */ + public List>> getDataframeWithMetadata(String query) { + List> valueResults = executeQuery(query); + Collection>> dataframe = onlyDistinct ? new LinkedHashSet<>() : new ArrayList<>(); + for (Map row : valueResults) { + Map> bindingRow = new HashMap<>(row.size()); + for (var rowEntry : row.entrySet()) { + String sparqlBindingKey = hashVariable + ? TemplateFunctions.literalHash(rowEntry.getKey()) + : rowEntry.getKey(); + bindingRow.put(sparqlBindingKey, bindingMetadata(rowEntry.getValue())); + } + dataframe.add(bindingRow); + } + return new ArrayList<>(dataframe); + } + + /** + * Builds the metadata map for a single binding {@code value}, mirroring the SPARQL JSON results + * structure. For a missing (unbound) value an empty map is returned. Otherwise, it contains {@code value}, + * {@code type} and, for literals, {@code datatype} and an optional {@code lang}. + * @param value binding value, may be {@code null} + * @return metadata map for the binding, empty if {@code value} is {@code null} + */ + private Map bindingMetadata(Value value) { + Map valueMap = new HashMap<>(); + if (value == null) + return valueMap; + valueMap.put("value", value.stringValue()); + if (value instanceof IRI) { + valueMap.put("type", "uri"); + } else if (value instanceof BNode) { + valueMap.put("type", "bnode"); + } else if (value instanceof Literal) { + valueMap.put("type", "literal"); + Literal literal = (Literal) value; + literal.getLanguage().ifPresent(lang -> valueMap.put("lang", lang)); + if (literal.getDatatype() != null) + valueMap.put("datatype", literal.getDatatype().stringValue()); + } + return valueMap; + } + + + /** * Executes a SPARQL query returning a list of rows as {@code List>} * and logging ({@code INFO} level) the query, the duration and the number of rows returned. diff --git a/src/test/java/com/cefriel/template/RDFReaderTests.java b/src/test/java/com/cefriel/template/RDFReaderTests.java index 8a9ab5c..6944700 100644 --- a/src/test/java/com/cefriel/template/RDFReaderTests.java +++ b/src/test/java/com/cefriel/template/RDFReaderTests.java @@ -18,17 +18,25 @@ import com.cefriel.template.io.rdf.RDFReader; import com.cefriel.template.utils.TemplateFunctions; +import org.eclipse.rdf4j.model.Model; +import org.eclipse.rdf4j.model.impl.TreeModel; +import org.eclipse.rdf4j.model.util.Models; import org.eclipse.rdf4j.repository.Repository; import org.eclipse.rdf4j.repository.sail.SailRepository; import org.eclipse.rdf4j.rio.RDFFormat; +import org.eclipse.rdf4j.rio.RDFParser; +import org.eclipse.rdf4j.rio.Rio; +import org.eclipse.rdf4j.rio.helpers.StatementCollector; import org.eclipse.rdf4j.sail.memory.MemoryStore; import org.junit.jupiter.api.Test; import java.io.FileInputStream; import java.io.InputStream; +import java.io.StringReader; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; +import java.util.List; import java.util.Map; public class RDFReaderTests { @@ -117,4 +125,94 @@ public void agencyParametricStream() throws Exception { } // TODO Add tests for TemplateMap // TODO Add tests for XMLReader + + @Test + public void dataframeWithMetadata() throws Exception { + Repository repo = new SailRepository(new MemoryStore()); + RDFReader reader = new RDFReader(repo); + reader.setBaseIRI("http://www.cefriel.com/data/"); + reader.addFile(resolvePath("agency", "input.ttl"), RDFFormat.TURTLE); + + String query = "PREFIX gtfs: " + + "PREFIX foaf: " + + "PREFIX dct: " + + "SELECT ?s ?name ?lang WHERE { " + + " ?s a gtfs:Agency ; foaf:name ?name ; dct:language ?lang . }"; + + List>> rows = reader.getDataframeWithMetadata(query); + + assert rows.size() == 2; + for (Map> row : rows) { + Map s = row.get("s"); + assert "uri".equals(s.get("type")); + assert s.get("value").startsWith("http://sprint-transport.eu/data/agencies/"); + assert s.get("datatype") == null; + + Map name = row.get("name"); + assert "literal".equals(name.get("type")); + assert name.get("value").endsWith("Agency"); + assert "http://www.w3.org/2001/XMLSchema#string".equals(name.get("datatype")); + + Map lang = row.get("lang"); + assert "en".equals(lang.get("value")); + } + + reader.addString( + "@prefix base: ." + + "@prefix xsd: ." + + "base:event base:start \"2026-06-29T10:30:00Z\"^^xsd:dateTime ; " + + "base:count \"42\"^^xsd:integer ; " + + "base:label \"ciao\"@it .", + RDFFormat.TURTLE); + + String typedQuery = "PREFIX base: " + + "SELECT ?start ?count ?label WHERE { " + + " base:event base:start ?start ; base:count ?count ; base:label ?label . }"; + List>> typedRows = reader.getDataframeWithMetadata(typedQuery); + + assert typedRows.size() == 1; + Map> typedRow = typedRows.get(0); + + Map start = typedRow.get("start"); + assert "literal".equals(start.get("type")); + assert "2026-06-29T10:30:00Z".equals(start.get("value")); + assert "http://www.w3.org/2001/XMLSchema#dateTime".equals(start.get("datatype")); + + Map count = typedRow.get("count"); + assert "42".equals(count.get("value")); + assert "http://www.w3.org/2001/XMLSchema#integer".equals(count.get("datatype")); + + Map label = typedRow.get("label"); + assert "ciao".equals(label.get("value")); + assert "it".equals(label.get("lang")); + + reader.shutDown(); + } + + @Test + public void dataframeWithMetadataMapping() throws Exception { + Repository repo = new SailRepository(new MemoryStore()); + RDFReader reader = new RDFReader(repo); + reader.setBaseIRI("http://www.cefriel.com/data/"); + String folder = "metadata"; + reader.addFile(resolvePath(folder, "input.ttl"), RDFFormat.TURTLE); + + TemplateExecutor executor = new TemplateExecutor(true, false, false, null); + Path template = Paths.get(resolvePath(folder, "template.vm")); + String result = executor.executeMapping(Map.of("reader", reader), template, new TemplateFunctions(), null); + + String expectedOutput = Files.readString(Paths.get(resolvePath(folder, "output.ttl"))); + Model resultModel = parseTurtle(result); + Model expectedModel = parseTurtle(expectedOutput); + assert Models.isomorphic(resultModel, expectedModel); + reader.shutDown(); + } + + private static Model parseTurtle(String rdf) throws Exception { + RDFParser parser = Rio.createParser(RDFFormat.TURTLE); + Model model = new TreeModel(); + parser.setRDFHandler(new StatementCollector(model)); + parser.parse(new StringReader(rdf), "http://example.com/base/"); + return model; + } } diff --git a/src/test/resources/metadata/input.ttl b/src/test/resources/metadata/input.ttl new file mode 100644 index 0000000..63c9ce0 --- /dev/null +++ b/src/test/resources/metadata/input.ttl @@ -0,0 +1,15 @@ +@prefix base: . +@prefix foaf: . +@prefix gtfs: . +@prefix dct: . +@prefix rdfs: . + + a gtfs:Agency; + dct:language "en"; + foaf:name "Best Agency" ; + rdfs:label "Migliore"@it . + + a gtfs:Agency; + dct:language "en"; + foaf:name "Wow Agency" ; + rdfs:label "Stupenda"@it . diff --git a/src/test/resources/metadata/output.ttl b/src/test/resources/metadata/output.ttl new file mode 100644 index 0000000..11351c2 --- /dev/null +++ b/src/test/resources/metadata/output.ttl @@ -0,0 +1,15 @@ +@prefix ex: . + + + ex:name "Best Agency" ; + ex:nameType "literal" ; + ex:nameDatatype "http://www.w3.org/2001/XMLSchema#string" ; + ex:label "Migliore" ; + ex:labelLang "it" . + + + ex:name "Wow Agency" ; + ex:nameType "literal" ; + ex:nameDatatype "http://www.w3.org/2001/XMLSchema#string" ; + ex:label "Stupenda" ; + ex:labelLang "it" . diff --git a/src/test/resources/metadata/template.vm b/src/test/resources/metadata/template.vm new file mode 100644 index 0000000..2a06550 --- /dev/null +++ b/src/test/resources/metadata/template.vm @@ -0,0 +1,10 @@ +#set ( $h = $reader.setQueryHeader(" + PREFIX foaf: + PREFIX gtfs: + PREFIX rdfs: + PREFIX dct: +") ) +#set ( $rows = $reader.getDataframeWithMetadata("SELECT ?s ?name ?label WHERE { ?s a gtfs:Agency ; foaf:name ?name ; rdfs:label ?label . }") ) +@prefix ex: . +#foreach($row in $rows)<$row.s.value> ex:name "$row.name.value" ; ex:nameType "$row.name.type" ; ex:nameDatatype "$row.name.datatype" ; ex:label "$row.label.value" ; ex:labelLang "$row.label.lang" . +#end