diff --git a/src/main/java/com/cefriel/template/io/rdf/RDFReader.java b/src/main/java/com/cefriel/template/io/rdf/RDFReader.java index 4f2f35b..820ff75 100644 --- a/src/main/java/com/cefriel/template/io/rdf/RDFReader.java +++ b/src/main/java/com/cefriel/template/io/rdf/RDFReader.java @@ -170,6 +170,59 @@ private List> getQueryResultsStringValue(String query) { return new ArrayList<>(dataframe); } + /** + * Executes a SPARQL query returning a list of rows as {@code List>>}. + * Each element of the list is a row mapping every variable name to a metadata map describing the + * bound value. The metadata map mirrors the SPARQL JSON results structure and may contain the keys + * {@code value}, {@code type} , {@code datatype} and {@code lang}. + * If {@code hashVariable} is set, variable names are hashed; if {@code onlyDistinct} is set, duplicate rows are removed. + * @param query SPARQL query to be executed + * @return Result of the SPARQL query with per-binding metadata maps + */ + public List>> getDataframeWithMetadata(String query) { + List> valueResults = executeQuery(query); + Collection>> dataframe = onlyDistinct ? new LinkedHashSet<>() : new ArrayList<>(); + for (Map row : valueResults) { + Map> bindingRow = new HashMap<>(row.size()); + for (var rowEntry : row.entrySet()) { + String sparqlBindingKey = hashVariable + ? TemplateFunctions.literalHash(rowEntry.getKey()) + : rowEntry.getKey(); + bindingRow.put(sparqlBindingKey, bindingMetadata(rowEntry.getValue())); + } + dataframe.add(bindingRow); + } + return new ArrayList<>(dataframe); + } + + /** + * Builds the metadata map for a single binding {@code value}, mirroring the SPARQL JSON results + * structure. For a missing (unbound) value an empty map is returned. Otherwise, it contains {@code value}, + * {@code type} and, for literals, {@code datatype} and an optional {@code lang}. + * @param value binding value, may be {@code null} + * @return metadata map for the binding, empty if {@code value} is {@code null} + */ + private Map bindingMetadata(Value value) { + Map valueMap = new HashMap<>(); + if (value == null) + return valueMap; + valueMap.put("value", value.stringValue()); + if (value instanceof IRI) { + valueMap.put("type", "uri"); + } else if (value instanceof BNode) { + valueMap.put("type", "bnode"); + } else if (value instanceof Literal) { + valueMap.put("type", "literal"); + Literal literal = (Literal) value; + literal.getLanguage().ifPresent(lang -> valueMap.put("lang", lang)); + if (literal.getDatatype() != null) + valueMap.put("datatype", literal.getDatatype().stringValue()); + } + return valueMap; + } + + + /** * Executes a SPARQL query returning a list of rows as {@code List>} * and logging ({@code INFO} level) the query, the duration and the number of rows returned. diff --git a/src/test/java/com/cefriel/template/RDFReaderTests.java b/src/test/java/com/cefriel/template/RDFReaderTests.java index 8a9ab5c..6944700 100644 --- a/src/test/java/com/cefriel/template/RDFReaderTests.java +++ b/src/test/java/com/cefriel/template/RDFReaderTests.java @@ -18,17 +18,25 @@ import com.cefriel.template.io.rdf.RDFReader; import com.cefriel.template.utils.TemplateFunctions; +import org.eclipse.rdf4j.model.Model; +import org.eclipse.rdf4j.model.impl.TreeModel; +import org.eclipse.rdf4j.model.util.Models; import org.eclipse.rdf4j.repository.Repository; import org.eclipse.rdf4j.repository.sail.SailRepository; import org.eclipse.rdf4j.rio.RDFFormat; +import org.eclipse.rdf4j.rio.RDFParser; +import org.eclipse.rdf4j.rio.Rio; +import org.eclipse.rdf4j.rio.helpers.StatementCollector; import org.eclipse.rdf4j.sail.memory.MemoryStore; import org.junit.jupiter.api.Test; import java.io.FileInputStream; import java.io.InputStream; +import java.io.StringReader; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; +import java.util.List; import java.util.Map; public class RDFReaderTests { @@ -117,4 +125,94 @@ public void agencyParametricStream() throws Exception { } // TODO Add tests for TemplateMap // TODO Add tests for XMLReader + + @Test + public void dataframeWithMetadata() throws Exception { + Repository repo = new SailRepository(new MemoryStore()); + RDFReader reader = new RDFReader(repo); + reader.setBaseIRI("http://www.cefriel.com/data/"); + reader.addFile(resolvePath("agency", "input.ttl"), RDFFormat.TURTLE); + + String query = "PREFIX gtfs: " + + "PREFIX foaf: " + + "PREFIX dct: " + + "SELECT ?s ?name ?lang WHERE { " + + " ?s a gtfs:Agency ; foaf:name ?name ; dct:language ?lang . }"; + + List>> rows = reader.getDataframeWithMetadata(query); + + assert rows.size() == 2; + for (Map> row : rows) { + Map s = row.get("s"); + assert "uri".equals(s.get("type")); + assert s.get("value").startsWith("http://sprint-transport.eu/data/agencies/"); + assert s.get("datatype") == null; + + Map name = row.get("name"); + assert "literal".equals(name.get("type")); + assert name.get("value").endsWith("Agency"); + assert "http://www.w3.org/2001/XMLSchema#string".equals(name.get("datatype")); + + Map lang = row.get("lang"); + assert "en".equals(lang.get("value")); + } + + reader.addString( + "@prefix base: ." + + "@prefix xsd: ." + + "base:event base:start \"2026-06-29T10:30:00Z\"^^xsd:dateTime ; " + + "base:count \"42\"^^xsd:integer ; " + + "base:label \"ciao\"@it .", + RDFFormat.TURTLE); + + String typedQuery = "PREFIX base: " + + "SELECT ?start ?count ?label WHERE { " + + " base:event base:start ?start ; base:count ?count ; base:label ?label . }"; + List>> typedRows = reader.getDataframeWithMetadata(typedQuery); + + assert typedRows.size() == 1; + Map> typedRow = typedRows.get(0); + + Map start = typedRow.get("start"); + assert "literal".equals(start.get("type")); + assert "2026-06-29T10:30:00Z".equals(start.get("value")); + assert "http://www.w3.org/2001/XMLSchema#dateTime".equals(start.get("datatype")); + + Map count = typedRow.get("count"); + assert "42".equals(count.get("value")); + assert "http://www.w3.org/2001/XMLSchema#integer".equals(count.get("datatype")); + + Map label = typedRow.get("label"); + assert "ciao".equals(label.get("value")); + assert "it".equals(label.get("lang")); + + reader.shutDown(); + } + + @Test + public void dataframeWithMetadataMapping() throws Exception { + Repository repo = new SailRepository(new MemoryStore()); + RDFReader reader = new RDFReader(repo); + reader.setBaseIRI("http://www.cefriel.com/data/"); + String folder = "metadata"; + reader.addFile(resolvePath(folder, "input.ttl"), RDFFormat.TURTLE); + + TemplateExecutor executor = new TemplateExecutor(true, false, false, null); + Path template = Paths.get(resolvePath(folder, "template.vm")); + String result = executor.executeMapping(Map.of("reader", reader), template, new TemplateFunctions(), null); + + String expectedOutput = Files.readString(Paths.get(resolvePath(folder, "output.ttl"))); + Model resultModel = parseTurtle(result); + Model expectedModel = parseTurtle(expectedOutput); + assert Models.isomorphic(resultModel, expectedModel); + reader.shutDown(); + } + + private static Model parseTurtle(String rdf) throws Exception { + RDFParser parser = Rio.createParser(RDFFormat.TURTLE); + Model model = new TreeModel(); + parser.setRDFHandler(new StatementCollector(model)); + parser.parse(new StringReader(rdf), "http://example.com/base/"); + return model; + } } diff --git a/src/test/resources/metadata/input.ttl b/src/test/resources/metadata/input.ttl new file mode 100644 index 0000000..63c9ce0 --- /dev/null +++ b/src/test/resources/metadata/input.ttl @@ -0,0 +1,15 @@ +@prefix base: . +@prefix foaf: . +@prefix gtfs: . +@prefix dct: . +@prefix rdfs: . + + a gtfs:Agency; + dct:language "en"; + foaf:name "Best Agency" ; + rdfs:label "Migliore"@it . + + a gtfs:Agency; + dct:language "en"; + foaf:name "Wow Agency" ; + rdfs:label "Stupenda"@it . diff --git a/src/test/resources/metadata/output.ttl b/src/test/resources/metadata/output.ttl new file mode 100644 index 0000000..11351c2 --- /dev/null +++ b/src/test/resources/metadata/output.ttl @@ -0,0 +1,15 @@ +@prefix ex: . + + + ex:name "Best Agency" ; + ex:nameType "literal" ; + ex:nameDatatype "http://www.w3.org/2001/XMLSchema#string" ; + ex:label "Migliore" ; + ex:labelLang "it" . + + + ex:name "Wow Agency" ; + ex:nameType "literal" ; + ex:nameDatatype "http://www.w3.org/2001/XMLSchema#string" ; + ex:label "Stupenda" ; + ex:labelLang "it" . diff --git a/src/test/resources/metadata/template.vm b/src/test/resources/metadata/template.vm new file mode 100644 index 0000000..2a06550 --- /dev/null +++ b/src/test/resources/metadata/template.vm @@ -0,0 +1,10 @@ +#set ( $h = $reader.setQueryHeader(" + PREFIX foaf: + PREFIX gtfs: + PREFIX rdfs: + PREFIX dct: +") ) +#set ( $rows = $reader.getDataframeWithMetadata("SELECT ?s ?name ?label WHERE { ?s a gtfs:Agency ; foaf:name ?name ; rdfs:label ?label . }") ) +@prefix ex: . +#foreach($row in $rows)<$row.s.value> ex:name "$row.name.value" ; ex:nameType "$row.name.type" ; ex:nameDatatype "$row.name.datatype" ; ex:label "$row.label.value" ; ex:labelLang "$row.label.lang" . +#end