List of usage examples for com.mongodb.bulk BulkWriteError getCode
public int getCode()
From source file:org.opencb.opencga.storage.mongodb.variant.adaptors.VariantMongoDBAdaptor.java
License:Apache License
/** * Two steps insertion://from w w w . j a va 2s . c om * First check that the variant and study exists making an update. * For those who doesn't exist, pushes a study with the file and genotype information * <p> * The documents that throw a "dup key" exception are those variants that exist and have the study. * Then, only for those variants, make a second update. * <p> * *An interesting idea would be to invert this actions depending on the number of already inserted variants. * * @param data Variants to insert * @param fileId File ID * @param variantConverter Variant converter to be used * @param variantSourceEntryConverter Variant source converter to be used * @param studyConfiguration Configuration for the study * @param loadedSampleIds Other loaded sampleIds EXCEPT those that are going to be loaded * @return QueryResult object */ QueryResult<MongoDBVariantWriteResult> insert(List<Variant> data, int fileId, DocumentToVariantConverter variantConverter, DocumentToStudyVariantEntryConverter variantSourceEntryConverter, StudyConfiguration studyConfiguration, List<Integer> loadedSampleIds) { MongoDBVariantWriteResult writeResult = new MongoDBVariantWriteResult(); long startTime = System.currentTimeMillis(); if (data.isEmpty()) { return new QueryResult<>("insertVariants", 0, 1, 1, "", "", Collections.singletonList(writeResult)); } List<Bson> queries = new ArrayList<>(data.size()); List<Bson> updates = new ArrayList<>(data.size()); // Use a multiset instead of a normal set, to keep tracking of duplicated variants Multiset<String> nonInsertedVariants = HashMultiset.create(); String fileIdStr = Integer.toString(fileId); // List<String> extraFields = studyConfiguration.getAttributes().getAsStringList(VariantStorageEngine.Options.EXTRA_GENOTYPE_FIELDS // .key()); boolean excludeGenotypes = studyConfiguration.getAttributes().getBoolean( VariantStorageEngine.Options.EXCLUDE_GENOTYPES.key(), VariantStorageEngine.Options.EXCLUDE_GENOTYPES.defaultValue()); long nanoTime = System.nanoTime(); Map missingSamples = Collections.emptyMap(); String defaultGenotype = studyConfiguration.getAttributes().getString(DEFAULT_GENOTYPE.key(), ""); if (defaultGenotype.equals(DocumentToSamplesConverter.UNKNOWN_GENOTYPE)) { logger.debug("Do not need fill gaps. DefaultGenotype is UNKNOWN_GENOTYPE({}).", DocumentToSamplesConverter.UNKNOWN_GENOTYPE); } else if (excludeGenotypes) { logger.debug("Do not need fill gaps. Excluding genotypes."); } else if (!loadedSampleIds.isEmpty()) { missingSamples = new Document(DocumentToSamplesConverter.UNKNOWN_GENOTYPE, loadedSampleIds); // ?/? } // List<Object> missingOtherValues = new ArrayList<>(loadedSampleIds.size()); // for (int i = 0; i < loadedSampleIds.size(); i++) { // missingOtherValues.add(DBObjectToSamplesConverter.UNKNOWN_FIELD); // } for (Variant variant : data) { if (variant.getType().equals(VariantType.NO_VARIATION)) { //Storage-MongoDB is not able to store NON VARIANTS writeResult.setSkippedVariants(writeResult.getSkippedVariants() + 1); continue; } else if (variant.getType().equals(VariantType.SYMBOLIC)) { logger.warn("Skip symbolic variant " + variant.toString()); writeResult.setSkippedVariants(writeResult.getSkippedVariants() + 1); continue; } String id = variantConverter.buildStorageId(variant); for (StudyEntry studyEntry : variant.getStudies()) { if (studyEntry.getFiles().size() == 0 || !studyEntry.getFiles().get(0).getFileId().equals(fileIdStr)) { continue; } int studyId = studyConfiguration.getStudyId(); Document study = variantSourceEntryConverter.convertToStorageType(variant, studyEntry); Document genotypes = study.get(DocumentToStudyVariantEntryConverter.GENOTYPES_FIELD, Document.class); if (genotypes != null) { //If genotypes is null, genotypes are not suppose to be loaded genotypes.putAll(missingSamples); //Add missing samples // for (String extraField : extraFields) { // List<Object> otherFieldValues = (List<Object>) study.get(extraField.toLowerCase()); // otherFieldValues.addAll(0, missingOtherValues); // } } Document push = new Document(DocumentToVariantConverter.STUDIES_FIELD, study); Document update = new Document().append("$push", push).append("$setOnInsert", variantConverter.convertToStorageType(variant)); if (variant.getIds() != null && !variant.getIds().isEmpty() && !variant.getIds().iterator().next().isEmpty()) { update.put("$addToSet", new Document(DocumentToVariantConverter.IDS_FIELD, new Document("$each", variant.getIds()))); } // { _id: <variant_id>, "studies.sid": {$ne: <studyId> } } //If the variant exists and contains the study, this find will fail, will try to do the upsert, and throw a // duplicated key exception. queries.add(new Document("_id", id).append( DocumentToVariantConverter.STUDIES_FIELD + "." + DocumentToStudyVariantEntryConverter.STUDYID_FIELD, new Document("$ne", studyId))); updates.add(update); } } // if (!queries.isEmpty()) { QueryOptions options = new QueryOptions(UPSERT, true); options.put(MULTI, false); int newDocuments; int updatedObjects; try { BulkWriteResult bulkWriteResult; bulkWriteResult = variantsCollection.update(queries, updates, options).first(); newDocuments = bulkWriteResult.getUpserts().size(); updatedObjects = bulkWriteResult.getModifiedCount(); } catch (MongoBulkWriteException e) { BulkWriteResult bulkWriteResult; bulkWriteResult = e.getWriteResult(); newDocuments = bulkWriteResult.getUpserts().size(); updatedObjects = bulkWriteResult.getModifiedCount(); for (BulkWriteError writeError : e.getWriteErrors()) { if (writeError.getCode() == 11000) { //Dup Key error code Matcher matcher = writeResultErrorPattern.matcher(writeError.getMessage()); if (matcher.find()) { String id = matcher.group(1); nonInsertedVariants.add(id); } else { throw e; } } else { throw e; } } } writeResult.setNewVariants(newDocuments); writeResult.setUpdatedVariants(updatedObjects); // writeResult.setNewDocuments(data.size() - nonInsertedVariants.size() - writeResult.getSkippedVariants()); queries.clear(); updates.clear(); } writeResult.setNewVariantsNanoTime(System.nanoTime() - nanoTime); nanoTime = System.nanoTime(); for (Variant variant : data) { variant.setAnnotation(null); String id = variantConverter.buildStorageId(variant); if (nonInsertedVariants != null && !nonInsertedVariants.contains(id)) { continue; //Already inserted variant } for (StudyEntry studyEntry : variant.getStudies()) { if (studyEntry.getFiles().size() == 0 || !studyEntry.getFiles().get(0).getFileId().equals(fileIdStr)) { continue; } Document studyObject = variantSourceEntryConverter.convertToStorageType(variant, studyEntry); Document genotypes = studyObject.get(DocumentToStudyVariantEntryConverter.GENOTYPES_FIELD, Document.class); Document push = new Document(); if (!excludeGenotypes) { if (genotypes != null) { //If genotypes is null, genotypes are not suppose to be loaded for (String genotype : genotypes.keySet()) { push.put( DocumentToVariantConverter.STUDIES_FIELD + ".$." + DocumentToStudyVariantEntryConverter.GENOTYPES_FIELD + "." + genotype, new Document("$each", genotypes.get(genotype))); } // for (String extraField : extraFields) { // List values = (List) studyObject.get(extraField.toLowerCase()); // push.put(DBObjectToVariantConverter.STUDIES_FIELD + ".$." + extraField.toLowerCase(), // new Document("$each", values).append("$position", loadedSampleIds.size())); // } } else { push.put( DocumentToVariantConverter.STUDIES_FIELD + ".$." + DocumentToStudyVariantEntryConverter.GENOTYPES_FIELD, Collections.emptyMap()); } } push.put( DocumentToVariantConverter.STUDIES_FIELD + ".$." + DocumentToStudyVariantEntryConverter.FILES_FIELD, ((List) studyObject.get(DocumentToStudyVariantEntryConverter.FILES_FIELD)).get(0)); Document update = new Document(new Document("$push", push)); queries.add(new Document("_id", id) .append(DocumentToVariantConverter.STUDIES_FIELD + '.' + DocumentToStudyVariantEntryConverter.STUDYID_FIELD, studyConfiguration.getStudyId()) .append(DocumentToVariantConverter.STUDIES_FIELD + '.' + DocumentToStudyVariantEntryConverter.FILES_FIELD + '.' + DocumentToStudyVariantEntryConverter.FILEID_FIELD, new Document("$ne", fileId))); updates.add(update); } } writeResult.setExistingVariantsNanoTime(System.nanoTime() - nanoTime); if (!queries.isEmpty()) { QueryOptions options = new QueryOptions(UPSERT, false); options.put(MULTI, false); QueryResult<BulkWriteResult> update = variantsCollection.update(queries, updates, options); // Can happen that nonInsertedVariantsNum != queries.size() != nonInsertedVariants.size() if there was // a duplicated variant. writeResult.setNonInsertedVariants(nonInsertedVariants.size() - update.first().getMatchedCount()); writeResult.setUpdatedVariants(writeResult.getUpdatedVariants() + update.first().getModifiedCount()); } return new QueryResult<>("insertVariants", ((int) (System.currentTimeMillis() - startTime)), 1, 1, "", "", Collections.singletonList(writeResult)); }
From source file:org.opencb.opencga.storage.mongodb.variant.load.stage.MongoDBVariantStageLoader.java
License:Apache License
/** * Given a map of id -> binary[], inserts the binary objects in the stage collection. * * {//from w ww . ja v a 2 s . c o m * <studyId> : { * <fileId> : [ BinData(), BinData() ] * } * } * * The field <fileId> is an array to detect duplicated variants within the same file. * * It may happen that an update with upsert:true fail if two different threads try to * update the same non existing document. * See https://jira.mongodb.org/browse/SERVER-14322 * * In that case, the non inserted values will be returned. * * @param values Map with all the values to insert * @param result MongoDBVariantWriteResult to fill * @param retryIds List of IDs to retry. If not null, only will update those documents within this set * @return List of non updated documents. * @throws MongoBulkWriteException if the exception was not a DuplicatedKeyException (e:11000) */ private Set<String> updateMongo(ListMultimap<Document, Binary> values, MongoDBVariantWriteResult result, Set<String> retryIds) { Set<String> nonInsertedIds = Collections.emptySet(); if (values.isEmpty()) { return nonInsertedIds; } List<Bson> queries = new LinkedList<>(); List<Bson> updates = new LinkedList<>(); for (Document id : values.keySet()) { if (retryIds == null || retryIds.contains(id.getString("_id"))) { List<Binary> binaryList = values.get(id); queries.add(eq("_id", id.getString("_id"))); if (binaryList.size() == 1) { updates.add(combine( resumeStageLoad ? addToSet(fieldName, binaryList.get(0)) : push(fieldName, binaryList.get(0)), setOnInsert(END_FIELD, id.get(END_FIELD)), setOnInsert(REF_FIELD, id.get(REF_FIELD)), setOnInsert(ALT_FIELD, id.get(ALT_FIELD)))); } else { updates.add(combine( resumeStageLoad ? addEachToSet(fieldName, binaryList) : pushEach(fieldName, binaryList), setOnInsert(END_FIELD, id.get(END_FIELD)), setOnInsert(REF_FIELD, id.get(REF_FIELD)), setOnInsert(ALT_FIELD, id.get(ALT_FIELD)))); } } } try { final BulkWriteResult mongoResult = collection.update(queries, updates, QUERY_OPTIONS).first(); result.setNewVariants(mongoResult.getInsertedCount()) .setUpdatedVariants(mongoResult.getModifiedCount()); } catch (MongoBulkWriteException e) { result.setNewVariants(e.getWriteResult().getInsertedCount()) .setUpdatedVariants(e.getWriteResult().getModifiedCount()); if (retryIds != null) { // If retryIds != null, means that this this was the second attempt to update. In this case, do fail. LOGGER.error("BulkWriteErrors when retrying the updates"); throw e; } nonInsertedIds = new HashSet<>(); for (BulkWriteError writeError : e.getWriteErrors()) { if (ErrorCategory.fromErrorCode(writeError.getCode()).equals(ErrorCategory.DUPLICATE_KEY)) { //Dup Key error code Matcher matcher = DUP_KEY_WRITE_RESULT_ERROR_PATTERN.matcher(writeError.getMessage()); if (matcher.find()) { String id = matcher.group(1); nonInsertedIds.add(id); LOGGER.warn("Catch error : {}", writeError.toString()); LOGGER.warn("DupKey exception inserting '{}'. Retry!", id); } else { LOGGER.error("WriteError with code {} does not match with the pattern {}", writeError.getCode(), DUP_KEY_WRITE_RESULT_ERROR_PATTERN.pattern()); throw e; } } else { throw e; } } } return nonInsertedIds; }