Package net.sansa_stack.spark.rdd.op.rdf
Class JavaRddOfTriplesOps
java.lang.Object
net.sansa_stack.spark.rdd.op.rdf.JavaRddOfTriplesOps
-
Constructor Summary
-
Method Summary
Modifier and TypeMethodDescriptionstatic <K> org.apache.spark.api.java.JavaPairRDD<K,
org.apache.jena.rdf.model.Model> groupBy
(org.apache.spark.api.java.JavaRDD<org.apache.jena.graph.Triple> rdd, org.aksw.commons.lambda.serializable.SerializableFunction<? super org.apache.jena.graph.Triple, K> tripleToKey) static org.apache.spark.api.java.JavaPairRDD<org.apache.jena.graph.Node,
org.apache.jena.rdf.model.Model> groupByObjectNodes
(org.apache.spark.api.java.JavaRDD<org.apache.jena.graph.Triple> rdd) static org.apache.spark.api.java.JavaPairRDD<String,
org.apache.jena.rdf.model.Model> groupByObjects
(org.apache.spark.api.java.JavaRDD<org.apache.jena.graph.Triple> rdd) static org.apache.spark.api.java.JavaPairRDD<org.apache.jena.graph.Node,
org.apache.jena.rdf.model.Model> groupByPredicateNodes
(org.apache.spark.api.java.JavaRDD<org.apache.jena.graph.Triple> rdd) This function mainly exists for completenessstatic org.apache.spark.api.java.JavaPairRDD<String,
org.apache.jena.rdf.model.Model> groupByPredicates
(org.apache.spark.api.java.JavaRDD<org.apache.jena.graph.Triple> rdd) This function mainly exists for completenessstatic org.apache.spark.api.java.JavaPairRDD<org.apache.jena.graph.Node,
org.apache.jena.rdf.model.Model> groupBySubjectNodes
(org.apache.spark.api.java.JavaRDD<org.apache.jena.graph.Triple> rdd) static org.apache.spark.api.java.JavaPairRDD<String,
org.apache.jena.rdf.model.Model> groupBySubjects
(org.apache.spark.api.java.JavaRDD<org.apache.jena.graph.Triple> rdd) static <K> org.apache.spark.api.java.JavaPairRDD<K,
org.apache.jena.rdf.model.Model> groupTriplesIntoModels
(org.apache.spark.api.java.JavaPairRDD<K, org.apache.jena.graph.Triple> rdd) static JavaRddFunction<org.apache.jena.graph.Triple,
org.apache.jena.sparql.core.Quad> mapIntoGraph
(org.apache.jena.graph.Node graphNode) static org.apache.spark.api.java.JavaRDD<org.apache.jena.rdf.model.Model>
mapToModel
(org.apache.spark.api.java.JavaRDD<org.apache.jena.graph.Triple> rdd) static org.apache.spark.api.java.JavaRDD<org.apache.jena.graph.Triple>
postProcess
(org.apache.spark.api.java.JavaRDD<org.apache.jena.graph.Triple> rdd, boolean sort, boolean ascending, boolean distinct, int numPartitions) Sort quads by their string representation (relies onNodeFmtLib.str(org.apache.jena.graph.Triple)
)static String
toGraphName
(org.apache.jena.graph.Node node) Map a node losslessly to an IRI suitable for use as a graph name This is needed to e.g.
-
Constructor Details
-
JavaRddOfTriplesOps
public JavaRddOfTriplesOps()
-
-
Method Details
-
groupTriplesIntoModels
public static <K> org.apache.spark.api.java.JavaPairRDD<K,org.apache.jena.rdf.model.Model> groupTriplesIntoModels(org.apache.spark.api.java.JavaPairRDD<K, org.apache.jena.graph.Triple> rdd) -
groupBy
public static <K> org.apache.spark.api.java.JavaPairRDD<K,org.apache.jena.rdf.model.Model> groupBy(org.apache.spark.api.java.JavaRDD<org.apache.jena.graph.Triple> rdd, org.aksw.commons.lambda.serializable.SerializableFunction<? super org.apache.jena.graph.Triple, K> tripleToKey) -
groupBySubjectNodes
public static org.apache.spark.api.java.JavaPairRDD<org.apache.jena.graph.Node,org.apache.jena.rdf.model.Model> groupBySubjectNodes(org.apache.spark.api.java.JavaRDD<org.apache.jena.graph.Triple> rdd) -
groupByObjectNodes
public static org.apache.spark.api.java.JavaPairRDD<org.apache.jena.graph.Node,org.apache.jena.rdf.model.Model> groupByObjectNodes(org.apache.spark.api.java.JavaRDD<org.apache.jena.graph.Triple> rdd) -
groupByPredicateNodes
public static org.apache.spark.api.java.JavaPairRDD<org.apache.jena.graph.Node,org.apache.jena.rdf.model.Model> groupByPredicateNodes(org.apache.spark.api.java.JavaRDD<org.apache.jena.graph.Triple> rdd) This function mainly exists for completeness -
groupBySubjects
public static org.apache.spark.api.java.JavaPairRDD<String,org.apache.jena.rdf.model.Model> groupBySubjects(org.apache.spark.api.java.JavaRDD<org.apache.jena.graph.Triple> rdd) -
groupByObjects
public static org.apache.spark.api.java.JavaPairRDD<String,org.apache.jena.rdf.model.Model> groupByObjects(org.apache.spark.api.java.JavaRDD<org.apache.jena.graph.Triple> rdd) -
groupByPredicates
public static org.apache.spark.api.java.JavaPairRDD<String,org.apache.jena.rdf.model.Model> groupByPredicates(org.apache.spark.api.java.JavaRDD<org.apache.jena.graph.Triple> rdd) This function mainly exists for completeness -
toGraphName
Map a node losslessly to an IRI suitable for use as a graph name This is needed to e.g. group triples by objects into named graphs in order to use all the RDF machinery - named graphs need to be IRIs. This mapping is used in the 'RddOfDatasetOps.naturalResource' TODO Add a reverse mapping -
mapToModel
public static org.apache.spark.api.java.JavaRDD<org.apache.jena.rdf.model.Model> mapToModel(org.apache.spark.api.java.JavaRDD<org.apache.jena.graph.Triple> rdd) -
mapIntoGraph
public static JavaRddFunction<org.apache.jena.graph.Triple,org.apache.jena.sparql.core.Quad> mapIntoGraph(org.apache.jena.graph.Node graphNode) -
postProcess
public static org.apache.spark.api.java.JavaRDD<org.apache.jena.graph.Triple> postProcess(org.apache.spark.api.java.JavaRDD<org.apache.jena.graph.Triple> rdd, boolean sort, boolean ascending, boolean distinct, int numPartitions) Sort quads by their string representation (relies onNodeFmtLib.str(org.apache.jena.graph.Triple)
)
-