Java tutorial
/* Copyright 2013 Red Hat, Inc. and/or its affiliates. This file is part of lightblue. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see <http://www.gnu.org/licenses/>. */ package com.redhat.lightblue.mongo.crud; import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.Set; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.node.JsonNodeFactory; import com.mongodb.DBCollection; import com.mongodb.DBCursor; import com.mongodb.DBObject; import com.mongodb.ReadPreference; import com.mongodb.WriteConcern; import com.redhat.lightblue.crud.CRUDOperation; import com.redhat.lightblue.crud.CRUDOperationContext; import com.redhat.lightblue.crud.CRUDUpdateResponse; import com.redhat.lightblue.crud.ConstraintValidator; import com.redhat.lightblue.crud.CrudConstants; import com.redhat.lightblue.crud.DocCtx; import com.redhat.lightblue.crud.ListDocumentStream; import com.redhat.lightblue.eval.FieldAccessRoleEvaluator; import com.redhat.lightblue.eval.Projector; import com.redhat.lightblue.eval.Updater; import com.redhat.lightblue.interceptor.InterceptPoint; import com.redhat.lightblue.metadata.EntityMetadata; import com.redhat.lightblue.metadata.PredefinedFields; import com.redhat.lightblue.metadata.Type; import com.redhat.lightblue.query.QueryExpression; import com.redhat.lightblue.util.Error; import com.redhat.lightblue.util.JsonDoc; import com.redhat.lightblue.util.JsonUtils; import com.redhat.lightblue.util.Measure; import com.redhat.lightblue.util.MemoryMonitor; import com.redhat.lightblue.util.MemoryMonitor.ThresholdMonitor; import com.redhat.lightblue.util.Path; /** * Non-atomic updater that evaluates the query, and updates the documents one by * one. */ public class IterateAndUpdate implements DocUpdater { private static final Logger LOGGER = LoggerFactory.getLogger(IterateAndUpdate.class); private static final Logger METRICS = LoggerFactory.getLogger("metrics." + IterateAndUpdate.class.getName()); private final int batchSize; private final JsonNodeFactory nodeFactory; private final ConstraintValidator validator; private final FieldAccessRoleEvaluator roleEval; private final DocTranslator translator; private final Updater updater; private final Projector projector; private final Projector errorProjector; private final WriteConcern writeConcern; private final ConcurrentModificationDetectionCfg concurrentModificationDetection; private class MongoSafeUpdateProtocolForUpdate extends MongoSafeUpdateProtocol { private final EntityMetadata md; private final Measure measure; private final BsonMerge merge; public MongoSafeUpdateProtocolForUpdate(DBCollection collection, WriteConcern writeConcern, DBObject query, ConcurrentModificationDetectionCfg cfg, EntityMetadata md, Measure measure) { super(collection, writeConcern, query, cfg); this.md = md; this.measure = measure; this.merge = new BsonMerge(md); } protected DBObject reapplyChanges(int docIndex, DBObject doc) { DocTranslator.TranslatedDoc jsonDoc = translator.toJson(doc); // We are bypassing validation here if (!updateDoc(md, jsonDoc.doc, measure)) return null; return translate(md, jsonDoc.doc, doc, merge, measure).doc; } } public IterateAndUpdate(JsonNodeFactory nodeFactory, ConstraintValidator validator, FieldAccessRoleEvaluator roleEval, DocTranslator translator, Updater updater, Projector projector, Projector errorProjector, WriteConcern writeConcern, int batchSize, ConcurrentModificationDetectionCfg concurrentModificationDetection) { this.nodeFactory = nodeFactory; this.validator = validator; this.roleEval = roleEval; this.translator = translator; this.updater = updater; this.projector = projector; this.errorProjector = errorProjector; this.writeConcern = writeConcern; this.batchSize = batchSize; this.concurrentModificationDetection = concurrentModificationDetection; } MemoryMonitor<DocCtx> memoryMonitor = null; public void setResultSizeThresholds(int maxResultSetSizeB, int warnResultSetSizeB, final QueryExpression forQuery) { this.memoryMonitor = new MemoryMonitor<>((doc) -> { int size = JsonUtils.size(doc.getRoot()); // account for docs copied by DocCtx.startModifications() if (doc.getOriginalDocument() != null) { size += JsonUtils.size(doc.getOriginalDocument().getRoot()); } if (doc.getUpdatedDocument() != null) { size += JsonUtils.size(doc.getUpdatedDocument().getRoot()); } return size; }); memoryMonitor.registerMonitor(new ThresholdMonitor<DocCtx>(maxResultSetSizeB, (current, threshold, doc) -> { throw Error.get(MongoCrudConstants.ERROR_RESULT_SIZE_TOO_LARGE, current + "B > " + threshold + "B"); })); memoryMonitor .registerMonitor(new ThresholdMonitor<DocCtx>(warnResultSetSizeB, (current, threshold, doc) -> { LOGGER.warn("{}: query={}, responseDataSizeB={}", MongoCrudConstants.WARN_RESULT_SIZE_LARGE, forQuery, current); })); } private BatchUpdate getUpdateProtocol(CRUDOperationContext ctx, DBCollection collection, DBObject query, EntityMetadata md, Measure measure) { if (ctx.isUpdateIfCurrent()) { // Retrieve doc versions from the context Type type = md.resolve(DocTranslator.ID_PATH).getType(); Set<DocIdVersion> docVersions = DocIdVersion.getDocIdVersions(ctx.getUpdateDocumentVersions(), type); UpdateIfSameProtocol uis = new UpdateIfSameProtocol(collection, writeConcern); uis.addVersions(docVersions); return uis; } else { return new MongoSafeUpdateProtocolForUpdate(collection, writeConcern, query, concurrentModificationDetection, md, measure); } } // used for testing protected void preCommit() { } @Override public void update(CRUDOperationContext ctx, DBCollection collection, EntityMetadata md, CRUDUpdateResponse response, DBObject query) { LOGGER.debug("iterateUpdate: start"); LOGGER.debug("Computing the result set for {}", query); Measure measure = new Measure(); BatchUpdate sup = getUpdateProtocol(ctx, collection, query, md, measure); DBCursor cursor = null; int docIndex = 0; int numMatched = 0; int numUpdated = 0; int numFailed = 0; BsonMerge merge = new BsonMerge(md); List<DocCtx> docUpdateAttempts = new ArrayList<>(); List<DocCtx> resultDocs = new ArrayList<>(); ctx.setInputDocuments(resultDocs); try { ctx.getFactory().getInterceptors().callInterceptors(InterceptPoint.PRE_CRUD_UPDATE_RESULTSET, ctx); measure.begin("collection.find"); cursor = collection.find(query, null); // Read from primary for read-for-update operations cursor.setReadPreference(ReadPreference.primary()); measure.end("collection.find"); LOGGER.debug("Found {} documents", cursor.count()); // read-update-write measure.begin("iteration"); int batchStartIndex = 0; // docUpdateAttempts[batchStartIndex] is the first doc in this batch // TODO: This code is very messy and probably has several logic bugs. I do not have time to fix it. // Things I noticed: // 1. numFailed is not updated consistently. Depending on where failure occurs, it may not be updated! // 2. resultDocs are not updated consistently. Depending on the branch, the document may not end up in the response. // It is not clear from reading the code when it's expected to be in the response or not. // I know from some failing tests in dependent services that at least some cases are bugged. // The amount of branching needs to be toned down, and low level state fiddling needs to be better abstracted // so it can be expressed in fewer places. while (cursor.hasNext()) { DBObject document = cursor.next(); numMatched++; boolean hasErrors = false; LOGGER.debug("Retrieved doc {}", docIndex); measure.begin("ctx.addDocument"); DocTranslator.TranslatedDoc translatedDoc = translator.toJson(document); DocCtx doc = new DocCtx(translatedDoc.doc, translatedDoc.rmd); doc.startModifications(); measure.end("ctx.addDocument"); // From now on: doc contains the working copy, and doc.originalDoc contains the original copy if (updateDoc(md, doc, measure)) { LOGGER.debug("Document {} modified, updating", docIndex); ctx.getFactory().getInterceptors() .callInterceptors(InterceptPoint.PRE_CRUD_UPDATE_DOC_VALIDATION, ctx, doc); LOGGER.debug("Running constraint validations"); measure.begin("validation"); validator.clearErrors(); validator.validateDoc(doc); measure.end("validation"); List<Error> errors = validator.getErrors(); if (errors != null && !errors.isEmpty()) { ctx.addErrors(errors); hasErrors = true; LOGGER.debug("Doc has errors"); } errors = validator.getDocErrors().get(doc); if (errors != null && !errors.isEmpty()) { doc.addErrors(errors); hasErrors = true; LOGGER.debug("Doc has data errors"); } if (!hasErrors) { hasErrors = accessCheck(doc, measure); } if (!hasErrors) { try { ctx.getFactory().getInterceptors().callInterceptors(InterceptPoint.PRE_CRUD_UPDATE_DOC, ctx, doc); DocTranslator.TranslatedBsonDoc updatedObject = translate(md, doc, document, merge, measure); sup.addDoc(updatedObject.doc); docUpdateAttempts.add(doc); // update in batches if (docUpdateAttempts.size() - batchStartIndex >= batchSize) { preCommit(); measure.begin("bulkUpdate"); BatchUpdate.CommitInfo ci = sup.commit(); measure.end("bulkUpdate"); for (Map.Entry<Integer, Error> entry : ci.errors.entrySet()) { docUpdateAttempts.get(entry.getKey() + batchStartIndex) .addError(entry.getValue()); } numFailed += ci.errors.size(); numUpdated += docUpdateAttempts.size() - batchStartIndex - ci.errors.size() - ci.lostDocs.size(); numMatched -= ci.lostDocs.size(); batchStartIndex = docUpdateAttempts.size(); int di = 0; // Only add the docs that were not lost for (DocCtx d : docUpdateAttempts) { if (!ci.lostDocs.contains(di)) { enforceMemoryLimit(d); resultDocs.add(d); } di++; } } doc.setCRUDOperationPerformed(CRUDOperation.UPDATE); doc.setUpdatedDocument(doc); } catch (Error e) { if (MongoCrudConstants.ERROR_RESULT_SIZE_TOO_LARGE.equals(e.getErrorCode())) { throw e; } else { LOGGER.warn("Update exception for document {}: {}", docIndex, e); doc.addError(Error.get(MongoCrudConstants.ERR_UPDATE_ERROR, e.toString())); hasErrors = true; } } catch (Exception e) { LOGGER.warn("Update exception for document {}: {}", docIndex, e); doc.addError(Error.get(MongoCrudConstants.ERR_UPDATE_ERROR, e.toString())); hasErrors = true; } } else { numFailed++; resultDocs.add(doc); } } else { LOGGER.debug("Document {} was not modified", docIndex); resultDocs.add(doc); } if (hasErrors) { LOGGER.debug("Document {} has errors", docIndex); doc.setOutputDocument(errorProjector.project(doc, nodeFactory)); } else if (projector != null) { LOGGER.debug("Projecting document {}", docIndex); doc.setOutputDocument(projector.project(doc, nodeFactory)); } docIndex++; } measure.end("iteration"); // if we have any remaining items to update if (docUpdateAttempts.size() > batchStartIndex) { preCommit(); BatchUpdate.CommitInfo ci = sup.commit(); for (Map.Entry<Integer, Error> entry : ci.errors.entrySet()) { docUpdateAttempts.get(entry.getKey() + batchStartIndex).addError(entry.getValue()); } numFailed += ci.errors.size(); numUpdated += docUpdateAttempts.size() - batchStartIndex - ci.errors.size() - ci.lostDocs.size(); numMatched -= ci.lostDocs.size(); int di = 0; for (DocCtx d : docUpdateAttempts) { if (!ci.lostDocs.contains(di)) { enforceMemoryLimit(d); resultDocs.add(d); } di++; } } } finally { if (cursor != null) { cursor.close(); } } ctx.setDocumentStream(new ListDocumentStream<DocCtx>(resultDocs)); response.setNumUpdated(numUpdated); response.setNumFailed(numFailed); response.setNumMatched(numMatched); METRICS.debug("IterateAndUpdate:\n{}", measure); } private void enforceMemoryLimit(DocCtx doc) { if (memoryMonitor != null) { // if memory threshold is exceeded, this will throw an Error memoryMonitor.apply(doc); // an Error means *inconsistent update operation*: // some batches will be updated, some don't // no hooks will fire for updated batches // counts sent to client will be set to zero // TODO: I perceive this as a problem with updates and hooks impl in general // we need to run hooks per batch (see https://github.com/lightblue-platform/lightblue-mongo/issues/378) } } private boolean updateDoc(EntityMetadata md, JsonDoc doc, Measure measure) { if (updater.update(doc, md.getFieldTreeRoot(), Path.EMPTY)) { // Remove any nulls from the document JsonDoc.filterNulls(doc.getRoot()); measure.begin("updateArraySizes"); PredefinedFields.updateArraySizes(md, nodeFactory, doc); measure.end("updateArraySizes"); return true; } else { return false; } } private DocTranslator.TranslatedBsonDoc translate(EntityMetadata md, JsonDoc doc, DBObject document, BsonMerge merge, Measure measure) { measure.begin("toBsonAndMerge"); DocTranslator.TranslatedBsonDoc updatedObject = translator.toBson(doc); merge.merge(document, updatedObject.doc); measure.end("toBsonAndMerge"); measure.begin("populateHiddenFields"); DocTranslator.populateDocHiddenFields(updatedObject.doc, md); measure.end("populateHiddenFields"); return updatedObject; } // Returns true if there is access check error private boolean accessCheck(DocCtx doc, Measure measure) { measure.begin("accessCheck"); Set<Path> paths = roleEval.getInaccessibleFields_Update(doc, doc.getOriginalDocument()); measure.end("accessCheck"); LOGGER.debug("Inaccesible fields during update={}" + paths); if (paths != null && !paths.isEmpty()) { doc.addError(Error.get("update", CrudConstants.ERR_NO_FIELD_UPDATE_ACCESS, paths.toString())); return true; } return false; } public int getDataSizeB() { if (memoryMonitor != null) { return memoryMonitor.getDataSizeB(); } else { return 0; } } }