Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.apache.rya.mongodb.aggregation; import static org.apache.rya.mongodb.dao.SimpleMongoDBStorageStrategy.CONTEXT; import static org.apache.rya.mongodb.dao.SimpleMongoDBStorageStrategy.DOCUMENT_VISIBILITY; import static org.apache.rya.mongodb.dao.SimpleMongoDBStorageStrategy.OBJECT; import static org.apache.rya.mongodb.dao.SimpleMongoDBStorageStrategy.OBJECT_HASH; import static org.apache.rya.mongodb.dao.SimpleMongoDBStorageStrategy.OBJECT_LANGUAGE; import static org.apache.rya.mongodb.dao.SimpleMongoDBStorageStrategy.OBJECT_TYPE; import static org.apache.rya.mongodb.dao.SimpleMongoDBStorageStrategy.PREDICATE; import static org.apache.rya.mongodb.dao.SimpleMongoDBStorageStrategy.PREDICATE_HASH; import static org.apache.rya.mongodb.dao.SimpleMongoDBStorageStrategy.STATEMENT_METADATA; import static org.apache.rya.mongodb.dao.SimpleMongoDBStorageStrategy.SUBJECT; import static org.apache.rya.mongodb.dao.SimpleMongoDBStorageStrategy.SUBJECT_HASH; import static org.apache.rya.mongodb.dao.SimpleMongoDBStorageStrategy.TIMESTAMP; import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.NavigableSet; import java.util.Set; import java.util.UUID; import java.util.concurrent.ConcurrentSkipListSet; import java.util.function.Function; import org.apache.rya.api.domain.RyaIRI; import org.apache.rya.api.domain.RyaStatement; import org.apache.rya.api.domain.RyaType; import org.apache.rya.api.domain.StatementMetadata; import org.apache.rya.api.resolver.RdfToRyaConversions; import org.apache.rya.mongodb.MongoDbRdfConstants; import org.apache.rya.mongodb.dao.MongoDBStorageStrategy; import org.apache.rya.mongodb.dao.SimpleMongoDBStorageStrategy; import org.apache.rya.mongodb.document.operators.query.ConditionalOperators; import org.apache.rya.mongodb.document.visibility.DocumentVisibilityAdapter; import org.bson.Document; import org.bson.conversions.Bson; import org.eclipse.rdf4j.common.iteration.CloseableIteration; import org.eclipse.rdf4j.model.IRI; import org.eclipse.rdf4j.model.Literal; import org.eclipse.rdf4j.model.Resource; import org.eclipse.rdf4j.model.Value; import org.eclipse.rdf4j.model.vocabulary.XMLSchema; import org.eclipse.rdf4j.query.BindingSet; import org.eclipse.rdf4j.query.QueryEvaluationException; import org.eclipse.rdf4j.query.algebra.Compare; import org.eclipse.rdf4j.query.algebra.ExtensionElem; import org.eclipse.rdf4j.query.algebra.ProjectionElem; import org.eclipse.rdf4j.query.algebra.ProjectionElemList; import org.eclipse.rdf4j.query.algebra.StatementPattern; import org.eclipse.rdf4j.query.algebra.ValueConstant; import org.eclipse.rdf4j.query.algebra.ValueExpr; import org.eclipse.rdf4j.query.algebra.Var; import org.eclipse.rdf4j.query.algebra.evaluation.impl.ExternalSet; import com.google.common.base.Objects; import com.google.common.base.Preconditions; import com.google.common.collect.BiMap; import com.google.common.collect.HashBiMap; import com.mongodb.BasicDBObject; import com.mongodb.DBObject; import com.mongodb.client.MongoCollection; import com.mongodb.client.model.Aggregates; import com.mongodb.client.model.BsonField; import com.mongodb.client.model.Filters; import com.mongodb.client.model.Projections; /** * Represents a portion of a query tree as MongoDB aggregation pipeline. Should * be built bottom-up: start with a statement pattern implemented as a $match * step, then add steps to the pipeline to handle higher levels of the query * tree. Methods are provided to add certain supported query operations to the * end of the internal pipeline. In some cases, specific arguments may be * unsupported, in which case the pipeline is unchanged and the method returns * false. */ public class AggregationPipelineQueryNode extends ExternalSet { /** * An aggregation result corresponding to a solution should map this key * to an object which itself maps variable names to variable values. */ static final String VALUES = "<VALUES>"; /** * An aggregation result corresponding to a solution should map this key * to an object which itself maps variable names to the corresponding hashes * of their values. */ static final String HASHES = "<HASHES>"; /** * An aggregation result corresponding to a solution should map this key * to an object which itself maps variable names to their datatypes, if any. */ static final String TYPES = "<TYPES>"; private static final String LEVEL = "derivation_level"; private static final String[] FIELDS = { VALUES, HASHES, TYPES, LEVEL, TIMESTAMP }; private static final String JOINED_TRIPLE = "<JOINED_TRIPLE>"; private static final String FIELDS_MATCH = "<JOIN_FIELDS_MATCH>"; private static final MongoDBStorageStrategy<RyaStatement> strategy = new SimpleMongoDBStorageStrategy(); private static final Bson DEFAULT_TYPE = new Document("$literal", XMLSchema.ANYURI.stringValue()); private static final Bson DEFAULT_CONTEXT = new Document("$literal", ""); private static final Bson DEFAULT_DV = DocumentVisibilityAdapter.toDBObject(MongoDbRdfConstants.EMPTY_DV); private static final Bson DEFAULT_METADATA = new Document("$literal", StatementMetadata.EMPTY_METADATA.toString()); private static boolean isValidFieldName(final String name) { return !(name == null || name.contains(".") || name.contains("$") || name.equals("_id")); } /** * For a given statement pattern, represents a mapping from query variables * to their corresponding parts of matching triples. If necessary, also * substitute variable names including invalid characters with temporary * replacements, while producing a map back to the original names. */ private static class StatementVarMapping { private final Map<String, String> varToTripleValue = new HashMap<>(); private final Map<String, String> varToTripleHash = new HashMap<>(); private final Map<String, String> varToTripleType = new HashMap<>(); private final BiMap<String, String> varToOriginalName; String valueField(final String varName) { return varToTripleValue.get(varName); } String hashField(final String varName) { return varToTripleHash.get(varName); } String typeField(final String varName) { return varToTripleType.get(varName); } Set<String> varNames() { return varToTripleValue.keySet(); } private String replace(final String original) { if (varToOriginalName.containsValue(original)) { return varToOriginalName.inverse().get(original); } else { final String replacement = "field-" + UUID.randomUUID(); varToOriginalName.put(replacement, original); return replacement; } } private String sanitize(final String name) { if (varToOriginalName.containsValue(name)) { return varToOriginalName.inverse().get(name); } else if (name != null && !isValidFieldName(name)) { return replace(name); } return name; } StatementVarMapping(final StatementPattern sp, final BiMap<String, String> varToOriginalName) { this.varToOriginalName = varToOriginalName; if (sp.getSubjectVar() != null && !sp.getSubjectVar().hasValue()) { final String name = sanitize(sp.getSubjectVar().getName()); varToTripleValue.put(name, SUBJECT); varToTripleHash.put(name, SUBJECT_HASH); } if (sp.getPredicateVar() != null && !sp.getPredicateVar().hasValue()) { final String name = sanitize(sp.getPredicateVar().getName()); varToTripleValue.put(name, PREDICATE); varToTripleHash.put(name, PREDICATE_HASH); } if (sp.getObjectVar() != null && !sp.getObjectVar().hasValue()) { final String name = sanitize(sp.getObjectVar().getName()); varToTripleValue.put(name, OBJECT); varToTripleHash.put(name, OBJECT_HASH); varToTripleType.put(name, OBJECT_TYPE); varToTripleType.put(name, OBJECT_LANGUAGE); } if (sp.getContextVar() != null && !sp.getContextVar().hasValue()) { final String name = sanitize(sp.getContextVar().getName()); varToTripleValue.put(name, CONTEXT); } } Bson getProjectExpression() { return getProjectExpression(new LinkedList<>(), str -> "$" + str); } Bson getProjectExpression(final Iterable<String> alsoInclude, final Function<String, String> getFieldExpr) { final Document values = new Document(); final Document hashes = new Document(); final Document types = new Document(); for (final String varName : varNames()) { values.append(varName, getFieldExpr.apply(valueField(varName))); if (varToTripleHash.containsKey(varName)) { hashes.append(varName, getFieldExpr.apply(hashField(varName))); } if (varToTripleType.containsKey(varName)) { types.append(varName, getFieldExpr.apply(typeField(varName))); } } for (final String varName : alsoInclude) { values.append(varName, 1); hashes.append(varName, 1); types.append(varName, 1); } final List<Bson> fields = new LinkedList<>(); fields.add(Projections.excludeId()); fields.add(Projections.computed(VALUES, values)); fields.add(Projections.computed(HASHES, hashes)); if (!types.isEmpty()) { fields.add(Projections.computed(TYPES, types)); } fields.add(Projections.computed(LEVEL, new Document("$max", Arrays.asList("$" + LEVEL, getFieldExpr.apply(LEVEL), 0)))); fields.add(Projections.computed(TIMESTAMP, new Document("$max", Arrays.asList("$" + TIMESTAMP, getFieldExpr.apply(TIMESTAMP), 0)))); return Projections.fields(fields); } } /** * Given a StatementPattern, generate an object representing the arguments * to a "$match" command that will find matching triples. * @param sp The StatementPattern to search for * @param path If given, specify the field that should be matched against * the statement pattern, using an ordered list of field names for a nested * field. E.g. to match records { "x": { "y": <statement pattern } }, pass * "x" followed by "y". * @return The argument of a "$match" query */ private static BasicDBObject getMatchExpression(final StatementPattern sp, final String... path) { final Var subjVar = sp.getSubjectVar(); final Var predVar = sp.getPredicateVar(); final Var objVar = sp.getObjectVar(); final Var contextVar = sp.getContextVar(); RyaIRI s = null; RyaIRI p = null; RyaType o = null; RyaIRI c = null; if (subjVar != null && subjVar.getValue() instanceof Resource) { s = RdfToRyaConversions.convertResource((Resource) subjVar.getValue()); } if (predVar != null && predVar.getValue() instanceof IRI) { p = RdfToRyaConversions.convertIRI((IRI) predVar.getValue()); } if (objVar != null && objVar.getValue() != null) { o = RdfToRyaConversions.convertValue(objVar.getValue()); } if (contextVar != null && contextVar.getValue() instanceof IRI) { c = RdfToRyaConversions.convertIRI((IRI) contextVar.getValue()); } final RyaStatement rs = new RyaStatement(s, p, o, c); final DBObject obj = strategy.getQuery(rs); // Add path prefix, if given if (path.length > 0) { final StringBuilder sb = new StringBuilder(); for (final String str : path) { sb.append(str).append("."); } final String prefix = sb.toString(); final Set<String> originalKeys = new HashSet<>(obj.keySet()); originalKeys.forEach(key -> { final Object value = obj.removeField(key); obj.put(prefix + key, value); }); } return (BasicDBObject) obj; } private static String valueFieldExpr(final String varName) { return "$" + VALUES + "." + varName; } private static String hashFieldExpr(final String varName) { return "$" + HASHES + "." + varName; } private static String typeFieldExpr(final String varName) { return "$" + TYPES + "." + varName; } private static String joinFieldExpr(final String triplePart) { return "$" + JOINED_TRIPLE + "." + triplePart; } /** * Get an object representing the value field of some value expression, or * return null if the expression isn't supported. */ private Object valueFieldExpr(final ValueExpr expr) { if (expr instanceof Var) { return valueFieldExpr(((Var) expr).getName()); } else if (expr instanceof ValueConstant) { return new Document("$literal", ((ValueConstant) expr).getValue().stringValue()); } else { return null; } } private final List<Bson> pipeline; private final MongoCollection<Document> collection; private final Set<String> assuredBindingNames; private final Set<String> bindingNames; private final BiMap<String, String> varToOriginalName; private String replace(final String original) { if (varToOriginalName.containsValue(original)) { return varToOriginalName.inverse().get(original); } else { final String replacement = "field-" + UUID.randomUUID(); varToOriginalName.put(replacement, original); return replacement; } } /** * Create a pipeline query node based on a StatementPattern. * @param collection The collection of triples to query. * @param baseSP The leaf node in the query tree. */ public AggregationPipelineQueryNode(final MongoCollection<Document> collection, final StatementPattern baseSP) { this.collection = Preconditions.checkNotNull(collection); Preconditions.checkNotNull(baseSP); this.varToOriginalName = HashBiMap.create(); final StatementVarMapping mapping = new StatementVarMapping(baseSP, varToOriginalName); this.assuredBindingNames = new HashSet<>(mapping.varNames()); this.bindingNames = new HashSet<>(mapping.varNames()); this.pipeline = new LinkedList<>(); this.pipeline.add(Aggregates.match(getMatchExpression(baseSP))); this.pipeline.add(Aggregates.project(mapping.getProjectExpression())); } AggregationPipelineQueryNode(final MongoCollection<Document> collection, final List<Bson> pipeline, final Set<String> assuredBindingNames, final Set<String> bindingNames, final BiMap<String, String> varToOriginalName) { this.collection = Preconditions.checkNotNull(collection); this.pipeline = Preconditions.checkNotNull(pipeline); this.assuredBindingNames = Preconditions.checkNotNull(assuredBindingNames); this.bindingNames = Preconditions.checkNotNull(bindingNames); this.varToOriginalName = Preconditions.checkNotNull(varToOriginalName); } @Override public boolean equals(final Object o) { if (this == o) { return true; } if (o instanceof AggregationPipelineQueryNode) { final AggregationPipelineQueryNode other = (AggregationPipelineQueryNode) o; if (this.collection.equals(other.collection) && this.assuredBindingNames.equals(other.assuredBindingNames) && this.bindingNames.equals(other.bindingNames) && this.varToOriginalName.equals(other.varToOriginalName) && this.pipeline.size() == other.pipeline.size()) { // Check pipeline steps for equality -- underlying types don't // have well-behaved equals methods, so check for equivalent // string representations. for (int i = 0; i < this.pipeline.size(); i++) { final Bson doc1 = this.pipeline.get(i); final Bson doc2 = other.pipeline.get(i); if (!doc1.toString().equals(doc2.toString())) { return false; } } return true; } } return false; } @Override public int hashCode() { return Objects.hashCode(collection, pipeline, assuredBindingNames, bindingNames, varToOriginalName); } @Override public CloseableIteration<BindingSet, QueryEvaluationException> evaluate(final BindingSet bindings) throws QueryEvaluationException { return new PipelineResultIteration(collection.aggregate(pipeline), varToOriginalName, bindings); } @Override public Set<String> getAssuredBindingNames() { final Set<String> names = new HashSet<>(); for (final String name : assuredBindingNames) { names.add(varToOriginalName.getOrDefault(name, name)); } return names; } @Override public Set<String> getBindingNames() { final Set<String> names = new HashSet<>(); for (final String name : bindingNames) { names.add(varToOriginalName.getOrDefault(name, name)); } return names; } @Override public AggregationPipelineQueryNode clone() { return new AggregationPipelineQueryNode(collection, new LinkedList<>(pipeline), new HashSet<>(assuredBindingNames), new HashSet<>(bindingNames), HashBiMap.create(varToOriginalName)); } @Override public String getSignature() { super.getSignature(); final Set<String> assured = getAssuredBindingNames(); final Set<String> any = getBindingNames(); final StringBuilder sb = new StringBuilder("AggregationPipelineQueryNode (binds: "); sb.append(String.join(", ", assured)); if (any.size() > assured.size()) { final Set<String> optionalBindingNames = any; optionalBindingNames.removeAll(assured); sb.append(" [").append(String.join(", ", optionalBindingNames)).append("]"); } sb.append(")\n"); for (final Bson doc : pipeline) { sb.append(doc.toString()).append("\n"); } return sb.toString(); } /** * Get the internal list of aggregation pipeline steps. Note that documents * resulting from this pipeline will be structured using an internal * intermediate representation. For documents representing triples, see * {@link #getTriplePipeline}, and for query solutions, see * {@link #evaluate}. * @return The current internal pipeline. */ List<Bson> getPipeline() { return pipeline; } /** * Add a join with an individual {@link StatementPattern} to the pipeline. * @param sp The statement pattern to join with * @return true if the join was successfully added to the pipeline. */ public boolean joinWith(final StatementPattern sp) { Preconditions.checkNotNull(sp); // 1. Determine shared variables and new variables final StatementVarMapping spMap = new StatementVarMapping(sp, varToOriginalName); final NavigableSet<String> sharedVars = new ConcurrentSkipListSet<>(spMap.varNames()); sharedVars.retainAll(assuredBindingNames); // 2. Join on one shared variable final String joinKey = sharedVars.pollFirst(); final String collectionName = collection.getNamespace().getCollectionName(); Bson join; if (joinKey == null) { return false; } else { join = Aggregates.lookup(collectionName, HASHES + "." + joinKey, spMap.hashField(joinKey), JOINED_TRIPLE); } pipeline.add(join); // 3. Unwind the joined triples so each document represents a binding // set (solution) from the base branch and a triple that may match. pipeline.add(Aggregates.unwind("$" + JOINED_TRIPLE)); // 4. (Optional) If there are any shared variables that weren't used as // the join key, project all existing fields plus a new field that // tests the equality of those shared variables. final BasicDBObject matchOpts = getMatchExpression(sp, JOINED_TRIPLE); if (!sharedVars.isEmpty()) { final List<Bson> eqTests = new LinkedList<>(); for (final String varName : sharedVars) { final String oldField = valueFieldExpr(varName); final String newField = joinFieldExpr(spMap.valueField(varName)); final Bson eqTest = new Document("$eq", Arrays.asList(oldField, newField)); eqTests.add(eqTest); } final Bson eqProjectOpts = Projections.fields(Projections.computed(FIELDS_MATCH, Filters.and(eqTests)), Projections.include(JOINED_TRIPLE, VALUES, HASHES, TYPES, LEVEL, TIMESTAMP)); pipeline.add(Aggregates.project(eqProjectOpts)); matchOpts.put(FIELDS_MATCH, true); } // 5. Filter for solutions whose triples match the joined statement // pattern, and, if applicable, whose additional shared variables // match the current solution. pipeline.add(Aggregates.match(matchOpts)); // 6. Project the results to include variables from the new SP (with // appropriate renaming) and variables referenced only in the base // pipeline (with previous names). final Bson finalProjectOpts = new StatementVarMapping(sp, varToOriginalName) .getProjectExpression(assuredBindingNames, str -> joinFieldExpr(str)); assuredBindingNames.addAll(spMap.varNames()); bindingNames.addAll(spMap.varNames()); pipeline.add(Aggregates.project(finalProjectOpts)); return true; } /** * Add a SPARQL projection or multi-projection operation to the pipeline. * The number of documents produced by the pipeline after this operation * will be the number of documents entering this stage (the number of * intermediate results) multiplied by the number of * {@link ProjectionElemList}s supplied here. Empty projections are * unsupported; if one or more projections given binds zero variables, then * the pipeline will be unchanged and the method will return false. * @param projections One or more projections, i.e. mappings from the result * at this stage of the query into a set of variables. * @return true if the projection(s) were added to the pipeline. */ public boolean project(final Iterable<ProjectionElemList> projections) { if (projections == null || !projections.iterator().hasNext()) { return false; } final List<Bson> projectOpts = new LinkedList<>(); final Set<String> bindingNamesUnion = new HashSet<>(); Set<String> bindingNamesIntersection = null; for (final ProjectionElemList projection : projections) { if (projection.getElements().isEmpty()) { // Empty projections are unsupported -- fail when seen return false; } final Document valueDoc = new Document(); final Document hashDoc = new Document(); final Document typeDoc = new Document(); final Set<String> projectionBindingNames = new HashSet<>(); for (final ProjectionElem elem : projection.getElements()) { String to = elem.getTargetName(); // If the 'to' name is invalid, replace it internally if (!isValidFieldName(to)) { to = replace(to); } String from = elem.getSourceName(); // If the 'from' name is invalid, use the internal substitute if (varToOriginalName.containsValue(from)) { from = varToOriginalName.inverse().get(from); } projectionBindingNames.add(to); if (to.equals(from)) { valueDoc.append(to, 1); hashDoc.append(to, 1); typeDoc.append(to, 1); } else { valueDoc.append(to, valueFieldExpr(from)); hashDoc.append(to, hashFieldExpr(from)); typeDoc.append(to, typeFieldExpr(from)); } } bindingNamesUnion.addAll(projectionBindingNames); if (bindingNamesIntersection == null) { bindingNamesIntersection = new HashSet<>(projectionBindingNames); } else { bindingNamesIntersection.retainAll(projectionBindingNames); } projectOpts.add(new Document().append(VALUES, valueDoc).append(HASHES, hashDoc).append(TYPES, typeDoc) .append(LEVEL, "$" + LEVEL).append(TIMESTAMP, "$" + TIMESTAMP)); } if (projectOpts.size() == 1) { pipeline.add(Aggregates.project(projectOpts.get(0))); } else { final String listKey = "PROJECTIONS"; final Bson projectIndividual = Projections.fields( Projections.computed(VALUES, "$" + listKey + "." + VALUES), Projections.computed(HASHES, "$" + listKey + "." + HASHES), Projections.computed(TYPES, "$" + listKey + "." + TYPES), Projections.include(LEVEL), Projections.include(TIMESTAMP)); pipeline.add(Aggregates.project(Projections.computed(listKey, projectOpts))); pipeline.add(Aggregates.unwind("$" + listKey)); pipeline.add(Aggregates.project(projectIndividual)); } assuredBindingNames.clear(); bindingNames.clear(); assuredBindingNames.addAll(bindingNamesIntersection); bindingNames.addAll(bindingNamesUnion); return true; } /** * Add a SPARQL extension to the pipeline, if possible. An extension adds * some number of variables to the result. Adds a "$project" step to the * pipeline, but differs from the SPARQL project operation in that * 1) pre-existing variables are always kept, and 2) values of new variables * are defined by expressions, which may be more complex than simply * variable names. Not all expressions are supported. If unsupported * expression types are used in the extension, the pipeline will remain * unchanged and this method will return false. * @param extensionElements A list of new variables and their expressions * @return True if the extension was successfully converted into a pipeline * step, false otherwise. */ public boolean extend(final Iterable<ExtensionElem> extensionElements) { final List<Bson> valueFields = new LinkedList<>(); final List<Bson> hashFields = new LinkedList<>(); final List<Bson> typeFields = new LinkedList<>(); for (final String varName : bindingNames) { valueFields.add(Projections.include(varName)); hashFields.add(Projections.include(varName)); typeFields.add(Projections.include(varName)); } final Set<String> newVarNames = new HashSet<>(); for (final ExtensionElem elem : extensionElements) { String name = elem.getName(); if (!isValidFieldName(name)) { // If the field name is invalid, replace it internally name = replace(name); } // We can only handle certain kinds of value expressions; return // failure for any others. final ValueExpr expr = elem.getExpr(); final Object valueField; final Object hashField; final Object typeField; if (expr instanceof Var) { final String varName = ((Var) expr).getName(); valueField = "$" + varName; hashField = "$" + varName; typeField = "$" + varName; } else if (expr instanceof ValueConstant) { final Value val = ((ValueConstant) expr).getValue(); valueField = new Document("$literal", val.stringValue()); hashField = new Document("$literal", SimpleMongoDBStorageStrategy.hash(val.stringValue())); if (val instanceof Literal) { typeField = new Document("$literal", ((Literal) val).getDatatype().stringValue()); } else { typeField = null; } } else { // if not understood, return failure return false; } valueFields.add(Projections.computed(name, valueField)); hashFields.add(Projections.computed(name, hashField)); if (typeField != null) { typeFields.add(Projections.computed(name, typeField)); } newVarNames.add(name); } assuredBindingNames.addAll(newVarNames); bindingNames.addAll(newVarNames); final Bson projectOpts = Projections.fields(Projections.computed(VALUES, Projections.fields(valueFields)), Projections.computed(HASHES, Projections.fields(hashFields)), Projections.computed(TYPES, Projections.fields(typeFields)), Projections.include(LEVEL), Projections.include(TIMESTAMP)); pipeline.add(Aggregates.project(projectOpts)); return true; } /** * Add a SPARQL filter to the pipeline, if possible. A filter eliminates * results that don't satisfy a given condition. Not all conditional * expressions are supported. If unsupported expressions are used in the * filter, the pipeline will remain unchanged and this method will return * false. Currently only supports binary {@link Compare} conditions among * variables and/or literals. * @param condition The filter condition * @return True if the filter was successfully converted into a pipeline * step, false otherwise. */ public boolean filter(final ValueExpr condition) { if (condition instanceof Compare) { final Compare compare = (Compare) condition; final Compare.CompareOp operator = compare.getOperator(); final Object leftArg = valueFieldExpr(compare.getLeftArg()); final Object rightArg = valueFieldExpr(compare.getRightArg()); if (leftArg == null || rightArg == null) { // unsupported value expression, can't convert filter return false; } final String opFunc; switch (operator) { case EQ: opFunc = "$eq"; break; case NE: opFunc = "$ne"; break; case LT: opFunc = "$lt"; break; case LE: opFunc = "$le"; break; case GT: opFunc = "$gt"; break; case GE: opFunc = "$ge"; break; default: // unrecognized comparison operator, can't convert filter return false; } final Document compareDoc = new Document(opFunc, Arrays.asList(leftArg, rightArg)); pipeline.add(Aggregates.project(Projections.fields(Projections.computed("FILTER", compareDoc), Projections.include(VALUES, HASHES, TYPES, LEVEL, TIMESTAMP)))); pipeline.add(Aggregates.match(new Document("FILTER", true))); pipeline.add(Aggregates .project(Projections.fields(Projections.include(VALUES, HASHES, TYPES, LEVEL, TIMESTAMP)))); return true; } return false; } /** * Add a $group step to filter out redundant solutions. * @return True if the distinct operation was successfully appended. */ public boolean distinct() { final List<String> key = new LinkedList<>(); for (final String varName : bindingNames) { key.add(hashFieldExpr(varName)); } final List<BsonField> reduceOps = new LinkedList<>(); for (final String field : FIELDS) { reduceOps.add(new BsonField(field, new Document("$first", "$" + field))); } pipeline.add(Aggregates.group(new Document("$concat", key), reduceOps)); return true; } /** * Add a step to the end of the current pipeline which prunes the results * according to the recorded derivation level of their sources. At least one * triple that was used to construct the result must have a derivation level * at least as high as the parameter, indicating that it was derived via * that many steps from the original data. (A value of zero is equivalent to * input data that was not derived at all.) Use in conjunction with * getTriplePipeline (which sets source level for generated triples) to * avoid repeatedly deriving the same results. * @param requiredLevel Required derivation depth. Reject a solution to the * query if all of the triples involved in producing that solution have a * lower derivation depth than this. If zero, does nothing. */ public void requireSourceDerivationDepth(final int requiredLevel) { if (requiredLevel > 0) { pipeline.add(Aggregates.match(new Document(LEVEL, new Document("$gte", requiredLevel)))); } } /** * Add a step to the end of the current pipeline which prunes the results * according to the timestamps of their sources. At least one triple that * was used to construct the result must have a timestamp at least as * recent as the parameter. Use in iterative applications to avoid deriving * solutions that would have been generated in an earlier iteration. * @param t Minimum required timestamp. Reject a solution to the query if * all of the triples involved in producing that solution have an earlier * timestamp than this. */ public void requireSourceTimestamp(final long t) { pipeline.add(Aggregates.match(new Document(TIMESTAMP, new Document("$gte", t)))); } /** * Given that the current state of the pipeline produces data that can be * interpreted as triples, add a project step to map each result from the * intermediate result structure to a structure that can be stored in the * triple store. Does not modify the internal pipeline, which will still * produce intermediate results suitable for query evaluation. * @param timestamp Attach this timestamp to the resulting triples. * @param requireNew If true, add an additional step to check constructed * triples against existing triples and only include new ones in the * result. Adds a potentially expensive $lookup step. * @throws IllegalStateException if the results produced by the current * pipeline do not have variable names allowing them to be interpreted as * triples (i.e. "subject", "predicate", and "object"). */ public List<Bson> getTriplePipeline(final long timestamp, final boolean requireNew) { if (!assuredBindingNames.contains(SUBJECT) || !assuredBindingNames.contains(PREDICATE) || !assuredBindingNames.contains(OBJECT)) { throw new IllegalStateException("Current pipeline does not produce " + "records that can be converted into triples.\n" + "Required variable names: <" + SUBJECT + ", " + PREDICATE + ", " + OBJECT + ">\nCurrent variable names: " + assuredBindingNames); } final List<Bson> triplePipeline = new LinkedList<>(pipeline); final List<Bson> fields = new LinkedList<>(); fields.add(Projections.computed(SUBJECT, valueFieldExpr(SUBJECT))); fields.add(Projections.computed(SUBJECT_HASH, hashFieldExpr(SUBJECT))); fields.add(Projections.computed(PREDICATE, valueFieldExpr(PREDICATE))); fields.add(Projections.computed(PREDICATE_HASH, hashFieldExpr(PREDICATE))); fields.add(Projections.computed(OBJECT, valueFieldExpr(OBJECT))); fields.add(Projections.computed(OBJECT_HASH, hashFieldExpr(OBJECT))); fields.add(Projections.computed(OBJECT_TYPE, ConditionalOperators.ifNull(typeFieldExpr(OBJECT), DEFAULT_TYPE))); fields.add(Projections.computed(OBJECT_LANGUAGE, hashFieldExpr(OBJECT))); fields.add(Projections.computed(CONTEXT, DEFAULT_CONTEXT)); fields.add(Projections.computed(STATEMENT_METADATA, DEFAULT_METADATA)); fields.add(DEFAULT_DV); fields.add(Projections.computed(TIMESTAMP, new Document("$literal", timestamp))); fields.add(Projections.computed(LEVEL, new Document("$add", Arrays.asList("$" + LEVEL, 1)))); triplePipeline.add(Aggregates.project(Projections.fields(fields))); if (requireNew) { // Prune any triples that already exist in the data store final String collectionName = collection.getNamespace().getCollectionName(); final Bson includeAll = Projections.include(SUBJECT, SUBJECT_HASH, PREDICATE, PREDICATE_HASH, OBJECT, OBJECT_HASH, OBJECT_TYPE, OBJECT_LANGUAGE, CONTEXT, STATEMENT_METADATA, DOCUMENT_VISIBILITY, TIMESTAMP, LEVEL); final List<Bson> eqTests = new LinkedList<>(); eqTests.add(new Document("$eq", Arrays.asList("$$this." + PREDICATE_HASH, "$" + PREDICATE_HASH))); eqTests.add(new Document("$eq", Arrays.asList("$$this." + OBJECT_HASH, "$" + OBJECT_HASH))); final Bson redundantFilter = new Document("$filter", new Document("input", "$" + JOINED_TRIPLE) .append("as", "this").append("cond", new Document("$and", eqTests))); triplePipeline.add(Aggregates.lookup(collectionName, SUBJECT_HASH, SUBJECT_HASH, JOINED_TRIPLE)); final String numRedundant = "REDUNDANT"; triplePipeline.add(Aggregates.project(Projections.fields(includeAll, Projections.computed(numRedundant, new Document("$size", redundantFilter))))); triplePipeline.add(Aggregates.match(Filters.eq(numRedundant, 0))); triplePipeline.add(Aggregates.project(Projections.fields(includeAll))); } return triplePipeline; } }