List of usage examples for com.mongodb DBCollection update
public WriteResult update(final DBObject query, final DBObject update, final boolean upsert, final boolean multi)
From source file:com.ikanow.infinit.e.processing.generic.aggregation.EntityAggregationUtils.java
License:Open Source License
/** * Updates the feature entries for the list of entities * that was just extracted including changing frequency, * adding aliases etc/* www .j a v a2s. c o m*/ * * This method now has 3 steps: * 1. Try to update alias * 1.a If fail, create new gaz * 2. Update totalfreq and doccount * * @param ents List of entities to update in the entity feature */ public static void updateEntityFeatures(Map<String, Map<ObjectId, EntityFeaturePojo>> entFeatures) { // Some diagnostic counters: int numCacheMisses = 0; int numCacheHits = 0; int numNewEntities = 0; long entityAggregationTime = new Date().getTime(); DBCollection col = DbManager.getFeature().getEntity(); // (This fn is normally run for a single community id) CommunityFeatureCaches.CommunityFeatureCache currCache = null; String savedSyncTime = null; for (Map<ObjectId, EntityFeaturePojo> entCommunity : entFeatures.values()) { Iterator<Map.Entry<ObjectId, EntityFeaturePojo>> it = entCommunity.entrySet().iterator(); while (it.hasNext()) { Map.Entry<ObjectId, EntityFeaturePojo> entFeatureKV = it.next(); try { EntityFeaturePojo entFeature = entFeatureKV.getValue(); long nSavedDocCount = entFeature.getDoccount(); long nSavedFreqCount = entFeature.getTotalfreq(); // (these should be constant across all communities but keep it here // so can assign it using entFeature, it's v cheap so no need to get once like for sync vars) // For each community, see if the entity feature already exists *for that community* ObjectId communityID = entFeature.getCommunityId(); if (null != communityID) { if ((null == currCache) || !currCache.getCommunityId().equals(entFeatureKV.getKey())) { currCache = CommunityFeatureCaches.getCommunityFeatureCache(entFeatureKV.getKey()); if (_diagnosticMode) { if (_logInDiagnosticMode) System.out.println( "EntityAggregationUtils.updateEntityFeatures, Opened cache for community: " + entFeatureKV.getKey()); } } //TESTED (by hand) // Is this in our cache? If so can short cut a bunch of the DB interaction: EntityFeaturePojo cachedEnt = currCache.getCachedEntityFeature(entFeature); if (null != cachedEnt) { if (_incrementalMode) { if (_diagnosticMode) { if (_logInDiagnosticMode) System.out.println( "EntityAggregationUtils.updateEntityFeatures, skip cached: " + cachedEnt.toDb()); //TODO (INF-2825): should be continue-ing here (after implementing incremental caching fully) so can use delta more efficiently... } } else if (_diagnosticMode) { if (_logInDiagnosticMode) System.out .println("EntityAggregationUtils.updateEntityFeatures, grabbed cached: " + cachedEnt.toDb()); } numCacheHits++; } //TESTED (by hand) else { numCacheMisses++; } BasicDBObject query = new BasicDBObject(EntityFeaturePojo.index_, entFeature.getIndex()); query.put(EntityFeaturePojo.communityId_, communityID); BasicDBObject updateOp = new BasicDBObject(); // Add aliases: BasicDBObject updateOpA = new BasicDBObject(); if (null != entFeature.getAlias()) { if ((null == cachedEnt) || (null == cachedEnt.getAlias()) || !cachedEnt.getAlias().containsAll(entFeature.getAlias())) { //(if the data we have is already cached, don't bother adding it again) BasicDBObject multiopE = new BasicDBObject(MongoDbManager.each_, entFeature.getAlias()); updateOpA.put(EntityFeaturePojo.alias_, multiopE); } //TESTED (by hand) } // Add link data, if there is any: if ((null != entFeature.getSemanticLinks()) && !entFeature.getSemanticLinks().isEmpty()) { if ((null == cachedEnt) || (null == cachedEnt.getSemanticLinks()) || !cachedEnt.getSemanticLinks().containsAll(entFeature.getSemanticLinks())) { //(if the data we have is already cached, don't bother adding it again) BasicDBObject multiopF = new BasicDBObject(MongoDbManager.each_, entFeature.getSemanticLinks()); updateOpA.put(EntityFeaturePojo.linkdata_, multiopF); } //TESTED (by hand) } // OK - now we can copy across the fields into the cache: if (null != cachedEnt) { currCache.updateCachedEntityFeatureStatistics(cachedEnt, entFeature); //(entFeature is now fully up to date) } //TESTED (by hand) if (!updateOpA.isEmpty()) { updateOp.put(MongoDbManager.addToSet_, updateOpA); } // Update frequency: BasicDBObject updateOpB = new BasicDBObject(); updateOpB.put(EntityFeaturePojo.totalfreq_, nSavedFreqCount); updateOpB.put(EntityFeaturePojo.doccount_, nSavedDocCount); updateOp.put(MongoDbManager.inc_, updateOpB); //try to use find/modify to see if something comes back and set doc freq/totalfreq BasicDBObject fields = new BasicDBObject(EntityFeaturePojo.totalfreq_, 1); fields.put(EntityFeaturePojo.doccount_, 1); fields.put(EntityFeaturePojo.alias_, 1); fields.put(EntityFeaturePojo.linkdata_, 1); //(slightly annoying, since only want these 2 largish fields if updating freq but won't know // until after i've got this object) fields.put(EntityFeaturePojo.db_sync_time_, 1); fields.put(EntityFeaturePojo.db_sync_doccount_, 1); DBObject dboUpdate = null; if (_diagnosticMode) { if (null == cachedEnt) { dboUpdate = col.findOne(query, fields); } } else { if (null != cachedEnt) { col.update(query, updateOp, false, false); } else { // Not cached - so have to grab the feature we're either getting or creating dboUpdate = col.findAndModify(query, fields, new BasicDBObject(), false, updateOp, false, true); // (can use findAndModify because specify index, ie the shard key) // (returns entity before the changes above, update the feature object below) // (also atomically creates the object if it doesn't exist so is "distributed-safe") } } if ((null != cachedEnt) || ((dboUpdate != null) && !dboUpdate.keySet().isEmpty())) // (feature already exists) { EntityFeaturePojo gp = cachedEnt; // (Update the entity feature to be correct so that it can be accurately synchronized with the index) if (null == gp) { gp = EntityFeaturePojo.fromDb(dboUpdate, EntityFeaturePojo.class); entFeature.setTotalfreq(gp.getTotalfreq() + nSavedFreqCount); entFeature.setDoccount(gp.getDoccount() + nSavedDocCount); entFeature.setDbSyncDoccount(gp.getDbSyncDoccount()); entFeature.setDbSyncTime(gp.getDbSyncTime()); if (null != gp.getAlias()) { entFeature.addAllAlias(gp.getAlias()); } if (null != gp.getSemanticLinks()) { entFeature.addToSemanticLinks(gp.getSemanticLinks()); } } //TESTED (cached case and non-cached case) // (in the cached case, entFeature has already been updated by updateCachedEntityFeatureStatistics) if (_diagnosticMode) { if (_logInDiagnosticMode) System.out.println("EntityAggregationUtils.updateEntityFeatures, found: " + ((BasicDBObject) gp.toDb()).toString()); if (_logInDiagnosticMode) System.out.println( "EntityAggregationUtils.updateEntityFeatures, ^^^ found from query: " + query.toString() + " / " + updateOp.toString()); } // (In background aggregation mode we update db_sync_prio when checking the doc update schedule) } else // (the object in memory is now an accurate representation of the database, minus some fields we'll now add) { numNewEntities++; // Synchronization settings for the newly created object if (null == savedSyncTime) { savedSyncTime = Long.toString(System.currentTimeMillis()); } entFeature.setDbSyncDoccount(nSavedDocCount); entFeature.setDbSyncTime(savedSyncTime); // This is all "distributed safe" (apart from the db_syc_xxx and it doesn't matter if that is // out of date, the update will just be slightly out-of-date at worst) since (otherwise) these fields are // only set here, and the findAndModify is atomic // (Do in raw MongoDB for performance) BasicDBObject baseFields = new BasicDBObject(); baseFields.put(EntityFeaturePojo.dimension_, entFeature.getDimension().toString()); baseFields.put(EntityFeaturePojo.type_, entFeature.getType()); baseFields.put(EntityFeaturePojo.disambiguated_name_, entFeature.getDisambiguatedName()); baseFields.put(EntityFeaturePojo.db_sync_doccount_, entFeature.getDbSyncDoccount()); baseFields.put(EntityFeaturePojo.db_sync_prio_, 1000.0); baseFields.put(EntityFeaturePojo.db_sync_time_, entFeature.getDbSyncTime()); if ((null != entFeature.getSemanticLinks()) && !entFeature.getSemanticLinks().isEmpty()) { baseFields.put(EntityFeaturePojo.linkdata_, entFeature.getSemanticLinks()); } //attempt to add geotag (makes necessary checks on util side) //also add ontology type if geotag is found EntityGeotagAggregationUtils.addEntityGeo(entFeature); if (entFeature.getGeotag() != null) { BasicDBObject geo = new BasicDBObject(GeoPojo.lat_, entFeature.getGeotag().lat); geo.put(GeoPojo.lon_, entFeature.getGeotag().lon); baseFields.put(EntityFeaturePojo.geotag_, geo); if (entFeature.getOntology_type() != null) { baseFields.put(EntityFeaturePojo.ontology_type_, entFeature.getOntology_type()); } } if (!_diagnosticMode) { // Store the object col.update(query, new BasicDBObject(MongoDbManager.set_, baseFields)); } else { if (_logInDiagnosticMode) System.out.println("EntityAggregationUtils.updateEntityFeatures, not found: " + query.toString() + ": " + baseFields.toString()); } } //(end first time this feature seen - globally) if (null == cachedEnt) { // First time we've seen this locally, so add to cache currCache.addCachedEntityFeature(entFeature); if (_diagnosticMode) { if (_logInDiagnosticMode) System.out .println("EntityAggregationUtils.updateEntityFeatures, added to cache: " + entFeature.toDb()); } } //TESTED (by hand) } //(end if community id assigned) } catch (Exception e) { // Exception, remove from feature list it.remove(); // If an exception occurs log the error logger.error("Exception Message: " + e.getMessage(), e); } } // (end loop over communities) } // (end loop over indexes) if ((numCacheHits > 0) || (numCacheMisses > 0)) { // ie some ents were grabbed int cacheSize = 0; if (null != currCache) { cacheSize = currCache.getEntityCacheSize(); } StringBuffer logMsg = new StringBuffer() // (should append key, but don't have that...) .append(" ent_agg_time_ms=").append(new Date().getTime() - entityAggregationTime) .append(" total_ents=").append(entFeatures.size()).append(" new_ents=").append(numNewEntities) .append(" cache_misses=").append(numCacheMisses).append(" cache_hits=").append(numCacheHits) .append(" cache_size=").append(cacheSize); logger.info(logMsg.toString()); } }
From source file:com.ikanow.infinit.e.processing.generic.aggregation.EntityAggregationUtils.java
License:Open Source License
public static void updateMatchingEntities(EntityFeaturePojo entFeature, ObjectId communityId) { String index = entFeature.getIndex(); long totalFreq = entFeature.getTotalfreq(); long docCount = entFeature.getDoccount(); try {/*from w w w . j av a2s . c o m*/ DBCollection docDb = DbManager.getDocument().getMetadata(); BasicDBObject query1 = new BasicDBObject(); query1.put(EntityPojo.docQuery_index_, index); query1.put(DocumentPojo.communityId_, communityId); BasicDBObject multiopB = new BasicDBObject(); multiopB.put(EntityPojo.docUpdate_totalfrequency_, totalFreq); multiopB.put(EntityPojo.docUpdate_doccount_, docCount); BasicDBObject multiopA = new BasicDBObject(MongoDbManager.set_, multiopB); if (_diagnosticMode) { if (_logInDiagnosticMode) System.out.println("EntityAggregationUtils.updateMatchingEntities: " + query1.toString() + " / " + multiopA.toString()); } else { synchronized (GenericProcessingController.class) { // Because this op can be slow, and traverse a lot of disk, need to ensure that // we don't allow all the threads to hammer it at once (the updates all yield to each other // enough that the disk goes totally crazy) docDb.update(query1, multiopA, false, true); DbManager.getDocument().getLastError(DbManager.getDocument().getMetadata().getName()); // (enforce consecutive accesses for this potentially very slow operation) } // Was originally checked updatedExisting but for INF-1406, it sometimes seemed to be // checking the wrong command. I suspect the reason we had this code in here has gone away, // and it doesn't matter if this update occasionally fails anyway, it will just be out of date // so the check/retry has been removed. } } catch (Exception ex) { logger.error(ex.getMessage(), ex); } }
From source file:com.ikanow.infinit.e.processing.generic.aggregation.EntityAggregationUtils.java
License:Open Source License
public static void synchronizeEntityFeature(EntityFeaturePojo entityFeature, ObjectId communityId) { DBCollection entityFeatureDb = DbManager.getFeature().getEntity(); // NOTE: Important that feeds update occurs before synchronization, since the sync "corrupts" the entity if (_diagnosticMode || (null != entityFeature.getDbSyncTime()) || (null != entityFeature.getDb_sync_prio())) { // Else this is a new feature so don't need to update the feature DB, only the index (if db_sync_prio null then have to update to avoid b/g aggergation loop) // (note that db_sync_prio will in practice not be set when this is a new feature because it will have same sync_doccount as doc_count) long nCurrTime = System.currentTimeMillis(); //(query from top of the function, basically lookup on gaz_index) BasicDBObject update2 = new BasicDBObject(); update2.put(EntityFeaturePojo.db_sync_time_, Long.toString(nCurrTime)); update2.put(EntityFeaturePojo.db_sync_doccount_, entityFeature.getDoccount()); BasicDBObject update = new BasicDBObject(MongoDbManager.set_, update2); BasicDBObject update3 = new BasicDBObject(EntityFeaturePojo.db_sync_prio_, 1); update.put(MongoDbManager.unset_, update3); BasicDBObject query = new BasicDBObject(EntityFeaturePojo.index_, entityFeature.getIndex()); query.put(EntityFeaturePojo.communityId_, communityId); if (_diagnosticMode) { if ((null != entityFeature.getDbSyncTime()) || (null != entityFeature.getDb_sync_prio())) { if (_logInDiagnosticMode) System.out.println("EntityAggregationUtils.synchronizeEntityFeature, featureDB: " + query.toString() + " / " + update.toString()); } else { if (_logInDiagnosticMode) System.out.println( "(WOULD NOT RUN) EntityAggregationUtils.synchronizeEntityFeature, featureDB: " + query.toString() + " / " + update.toString()); }/*from www. j av a 2s. c o m*/ } else { entityFeatureDb.update(query, update, false, true); } } if (_diagnosticMode) { if (_logInDiagnosticMode) System.out.println("EntityAggregationUtils.synchronizeEntityFeature, synchronize: " + new StringBuffer(entityFeature.getIndex()).append(':').append(communityId).toString() + " = " + IndexManager.mapToIndex(entityFeature, new EntityFeaturePojoIndexMap())); } else { ElasticSearchManager esm = IndexManager.getIndex(EntityFeaturePojoIndexMap.indexName_); esm.addDocument(entityFeature, new EntityFeaturePojoIndexMap(), null, true); //(_id is set by the index map to index:communityId) } }
From source file:com.ikanow.infinit.e.processing.generic.aggregation.EntityAggregationUtils.java
License:Open Source License
public static void markEntityFeatureForSync(EntityFeaturePojo entityFeature, ObjectId communityId) { DBCollection entityFeatureDb = DbManager.getFeature().getEntity(); double dPrio = 100.0 * (double) entityFeature.getDoccount() / (0.01 + (double) entityFeature.getDbSyncDoccount()); entityFeature.setDb_sync_prio(dPrio); BasicDBObject query = new BasicDBObject(EntityFeaturePojo.index_, entityFeature.getIndex()); query.put(EntityFeaturePojo.communityId_, communityId); BasicDBObject update = new BasicDBObject(MongoDbManager.set_, new BasicDBObject(EntityFeaturePojo.db_sync_prio_, dPrio)); if (_diagnosticMode) { if (_logInDiagnosticMode) System.out.println("EntityAggregationUtils.markEntityFeatureForSynchronization, featureDB: " + query.toString() + " / " + update.toString()); } else {// w ww . j av a 2 s . c o m entityFeatureDb.update(query, update, false, true); } }
From source file:com.ikanow.infinit.e.processing.generic.store_and_index.StoreAndIndexManager.java
License:Open Source License
/** * Save the fulltext of a pojo to mongo for using later * // w w w . j av a2s . c om * @param docs */ private void saveContent(List<DocumentPojo> docs) { try { DBCollection contentDb = DbManager.getDocument().getContent(); for (DocumentPojo doc : docs) { boolean bStoreContent = true; bStoreContent &= (0 != nMaxContentLen_bytes); // (otherwise it's turned off) bStoreContent &= this.bStoreMetadataAsContent || ((null != doc.getFullText()) && !doc.getFullText().isEmpty()); boolean bDocHasExternalContent = docHasExternalContent(doc.getUrl(), doc.getSourceUrl()); if (bStoreContent && bDocHasExternalContent) { try { String rawText = this.bStoreRawContent ? doc.getRawFullText() : null; DocumentPojo meta = bStoreMetadataAsContent ? doc : null; CompressedFullTextPojo gzippedContent = new CompressedFullTextPojo(doc.getUrl(), doc.getSourceKey(), doc.getCommunityId(), doc.getFullText(), rawText, meta, nMaxContentLen_bytes); if (null != gzippedContent.getUrl()) { // Be efficient and write field-by-field vs using JSON conversion BasicDBObject query = new BasicDBObject(CompressedFullTextPojo.url_, gzippedContent.getUrl()); query.put(CompressedFullTextPojo.sourceKey_, gzippedContent.getSourceKey()); BasicDBObject update = gzippedContent.getUpdate(); if (!_diagnosticMode) { contentDb.update(query, update, true, false); // (ie upsert, supported because query includes shard key==url) } else { System.out.println("StoreAndIndexManager.savedContent, save content: " + gzippedContent.getUrl()); } } } catch (Exception ex) { // Do nothing, just carry on ex.printStackTrace(); } } //TESTED } } catch (Exception ex) { // This is a more serious error logger.error(ex.getMessage()); } }
From source file:com.ikanow.infinit.e.processing.generic.store_and_index.StoreAndIndexManager.java
License:Open Source License
/** * Remove a doc from the data store, ensures all the fields specified in "fields" are populated (ready for index deletion) * @param col/*from www . ja v a 2 s .c om*/ * @param doc - needs url, sourceKey set * @param fields - fields to retrieve (index, created), set in calling function outside of loop for performance * * CALLED FROM: removeFromDatastore_byURL(col, List<doc>, bDeleteContent) <- ADDS INDEX, CREATED TO FIELDS * removeFromDataStore_byURL(List<doc>, bDeleteContent) [ALSO DELETES FROM INDEX AFTER ADDED FROM HERE] * MongoDocumentTxfer.doDelete(...) <- SETS URL, SOURCE URL, SOURCE KEY, COMMUNITY ID, INDEX, _ID * processDocuments(...) [ always called after harvester: have sourceUrl, sourceKey, * DON'T have _id, BUT do have updateId and index (correct except in many geo cases)] * pruneSource(source, ...) <- SETS URL, SOURCE URL, SOURCE KEY, INDEX * updateHarvestStatus(...) */ private void removeFromDatastore_byURL(DBCollection col, DocumentPojo doc, BasicDBObject fields, boolean bDeleteContent) { // 1] Create the query to soft delete the document BasicDBObject query = new BasicDBObject(); query.put(DocumentPojo.url_, doc.getUrl()); query.put(DocumentPojo.sourceKey_, SourcePojo.getDistributedKeyQueryTerm(doc.getSourceKey())); // 2] Delete the content if needed if (bDeleteContent) { if (docHasExternalContent(doc.getUrl(), doc.getSourceUrl())) { if (!_diagnosticMode) { DbManager.getDocument().getContent().remove(query); } else { System.out.println("StoreAndIndexManager.removeFromDatastore_byUrl(2), delete content: " + doc.getSourceKey() + "/" + doc.getUrl()); } } } //TESTED // 3] Work out which fields we have and which (if any we need to go and fetch): boolean needToFindAndModify = false; if (null == doc.getId()) { // This is called from processDocuments if (null != doc.getUpdateId()) { // update case... doc.setId(doc.getUpdateId()); // (note this is overwritten by addToDatastore later, in update case, so we're good) // (doc.index is populated but may not be correct because of the "many geos" workaround): if (DocumentPojoIndexMap.hasManyGeos(doc)) { doc.setIndex(DocumentPojoIndexMap.manyGeoDocumentIndex_); // (note this check isn't stateless, it actually populates "locs" at the same time // this is handled in addToDatastore (update case), temp removed when adding to DB } //TESTED (2.1.2, diagnostic mode, doc2) } else { // Not an update case, we're going to have to grab the document after all, which is a bit slower needToFindAndModify = true; } } //TESTED (2.1.2, diagnostic mode, doc2) if (!needToFindAndModify) { // set created if we need to, since we're not grabbing it from the datastore if (null != doc.getUpdateId()) { // (this means we have an approx created if we don't need to go fetch the deleted doc) doc.setCreated(new Date(doc.getUpdateId().getTime())); } //TESTED (2.1.2, diagnostic mode, doc2) } // (if we're here and index is not set, then it is intended to be null) // 4] Update the doc_metadata collection BasicDBObject softDelete = getSoftDeleteUpdate(); BasicDBObject deadDoc = null; // (not normally needed) if (needToFindAndModify) { // less pleasant, need to go grab the doc deadDoc = (BasicDBObject) col.findOne(query, fields); } //TESTED (2.1.2) if (!_diagnosticMode) { col.update(query, softDelete, false, true); // (needs to be multi- even though there's a single element for sharding reasons) } //TESTED (2.1.2) // 5] Add fields if necessary if (null != deadDoc) { doc.setCreated((Date) deadDoc.get(DocumentPojo.created_)); // (if getting this doc anyway then might as well get the created) doc.setId((ObjectId) deadDoc.get(DocumentPojo._id_)); doc.setIndex((String) deadDoc.get(DocumentPojo.index_)); if (_diagnosticMode) { System.out .println("StoreAndIndexManager.removeFromDatastore_byUrl(2): found " + deadDoc.toString()); } } //TESTED (2.1.2) else if (_diagnosticMode) { if (!needToFindAndModify) { System.out.println("StoreAndIndexManager.removeFromDatastore_byUrl(2): straight deleted " + doc.toDb().toString()); } else { System.out.println( "StoreAndIndexManager.removeFromDatastore_byUrl(2): didn't find " + query.toString()); } } //TESTED (2.1.2) }
From source file:com.ikanow.infinit.e.utility.MongoAssociationFeatureTxfer.java
License:Apache License
private void doTransfer(BasicDBObject query, int nSkip, int nLimit, BasicDBObject chunk) { ElasticSearchManager elasticManager = null; // Initialize the DB: DBCollection eventFeatureDB = DbManager.getFeature().getAssociation(); // Initialize the ES (create the index if it doesn't already): // 1. Set-up the entity feature index ElasticSearchManager.setDefaultClusterName("infinite-aws"); // (delete the index) //elasticManager = ElasticSearchManager.getIndex("association_index"); //elasticManager.deleteMe(); // Create the index if necessary String sMapping = new Gson().toJson(new AssociationFeaturePojoIndexMap.Mapping(), AssociationFeaturePojoIndexMap.Mapping.class); Builder localSettings = ImmutableSettings.settingsBuilder(); localSettings.put("number_of_shards", 1).put("number_of_replicas", 0); localSettings.put("index.analysis.analyzer.suggestAnalyzer.tokenizer", "standard"); localSettings.putArray("index.analysis.analyzer.suggestAnalyzer.filter", "standard", "lowercase"); elasticManager = ElasticSearchManager.createIndex("association_index", null, false, null, sMapping, localSettings);// www. java2 s. co m // Get the index (necessary if already created) if (null == elasticManager) { elasticManager = ElasticSearchManager.getIndex("association_index"); } // Now query the DB: DBCursor dbc = null; dbc = eventFeatureDB.find(query); if (null != chunk) { if (chunk.containsField(DbManager.min_)) { dbc = dbc.addSpecial(DbManager.min_, chunk.get(DbManager.min_)); } if (chunk.containsField(DbManager.max_)) { dbc = dbc.addSpecial(DbManager.max_, chunk.get(DbManager.max_)); } } dbc = dbc.skip(nSkip).limit(nLimit).batchSize(1000); if (null == chunk) { int nCount = dbc.count() - nSkip; if (nCount < 0) nCount = 0; System.out.println( "Found " + nCount + " records to sync, process first " + (0 == nLimit ? nCount : nLimit)); if (0 == nCount) { // Nothing to do... return; } } List<AssociationFeaturePojo> events = new LinkedList<AssociationFeaturePojo>(); int nSynced = 0; // Loop over array and invoke the cleansing function for each one while (dbc.hasNext()) { BasicDBObject dbo = (BasicDBObject) dbc.next(); AssociationFeaturePojo evt = AssociationFeaturePojo.fromDb(dbo, AssociationFeaturePojo.class); // If this table has just been rebuilt from the document then the indexes are all wrong ... // recalculate and save if ('#' == evt.getIndex().charAt(0)) { AssociationPojo singleEvt = new AssociationPojo(); singleEvt.setEntity1_index(evt.getEntity1_index()); singleEvt.setEntity2_index(evt.getEntity2_index()); singleEvt.setVerb_category(evt.getVerb_category()); singleEvt.setGeo_index(evt.getGeo_index()); evt.setIndex(AssociationAggregationUtils.getEventFeatureIndex(singleEvt)); eventFeatureDB .update(new BasicDBObject("_id", dbo.get("_id")), new BasicDBObject(MongoDbManager.set_, new BasicDBObject(AssociationFeaturePojo.index_, evt.getIndex())), false, true); // (has to be a multi-update even though it's unique because it's sharded on index) } // Handle groups (system group is: "4c927585d591d31d7b37097a") if (null == evt.getCommunityId()) { evt.setCommunityId(new ObjectId("4c927585d591d31d7b37097a")); } // Bulk add prep events.add(evt); nSynced++; if (events.size() > 1000) { elasticManager.bulkAddDocuments(IndexManager.mapListToIndex(events, AssociationFeaturePojo.listType(), new AssociationFeaturePojoIndexMap()), "_id", null, true); events.clear(); } } // End loop over entities //write whatevers left elasticManager.bulkAddDocuments(IndexManager.mapListToIndex(events, AssociationFeaturePojo.listType(), new AssociationFeaturePojoIndexMap()), "_id", null, true); if (null != chunk) { System.out.println("Found " + nSynced + " records to sync in chunk"); } }
From source file:com.imaginea.mongodb.services.DocumentServiceImpl.java
License:Apache License
/** * Updates a document inside a collection in a database in mongo to which * user is connected to./*from w ww .j ava 2 s .co m*/ * * @param dbName * Name of Database * @param collectionName * Name of Collection from which to get all Documents * @param id * Id of Document to be updated * @param newData * new Document value. * @return Update status * @exception EmptyDatabaseNameException * If database name is null * @exception EmptyCollectionNameException * If Collection name is null * @exception EmptyDocumentDataException * If Document data is null * @exception UndefinedDatabaseException * If database is not present * @exception UndefinedCollectionException * If Collection is not present * @exception UpdateDocumentException * Any exception while updating document * @exception DatabaseException * throw super type of UndefinedDatabaseException * @exception ValidationException * throw super type of * EmptyDatabaseNameException,EmptyCollectionNameException * ,EmptyDocumentDataException * @exception CollectionException * throw super type of UndefinedCollectionException * @exception DocumentException * throw super type of UpdateDocumentException * @exception JSONException * */ public String updateDocument(String dbName, String collectionName, ObjectId id, DBObject newData) throws DatabaseException, CollectionException, DocumentException, ValidationException { mongoInstance = mongoInstanceProvider.getMongoInstance(); if (dbName == null) { throw new EmptyDatabaseNameException("Database name is null"); } if (dbName.equals("")) { throw new EmptyDatabaseNameException("Database Name Empty"); } if (collectionName == null) { throw new EmptyCollectionNameException("Collection name is null"); } if (collectionName.equals("")) { throw new EmptyCollectionNameException("Collection Name Empty"); } String result = null; DBObject documentData = null; try { if (!mongoInstance.getDatabaseNames().contains(dbName)) { throw new UndefinedDatabaseException("DB [" + dbName + "] DOES NOT EXIST"); } if (!mongoInstance.getDB(dbName).getCollectionNames().contains(collectionName)) { throw new UndefinedCollectionException( "COLLECTION [ " + collectionName + "] _DOES_NOT_EXIST in Db [ " + dbName + "]"); } if (id == null) { throw new EmptyDocumentDataException("Document is empty"); } String temp = (String) newData.get("_id"); if (temp == null) { throw new DocumentException(ErrorCodes.INVALID_OBJECT_ID, "INVALID_OBJECT_ID"); } if (temp.equals("")) { throw new DocumentException(ErrorCodes.INVALID_OBJECT_ID, "INVALID_OBJECT_ID"); } ObjectId newId = new ObjectId(temp); if (!newId.equals(id)) { throw new DocumentException(ErrorCodes.INVALID_OBJECT_ID, "Cannot Change Object Id of a document"); } else { // Id's equal but putting the id of old document still // as newData as id of string type but we need ObjectId type newData.put("_id", id); } DBObject query = new BasicDBObject("_id", id); DBCollection collection = mongoInstance.getDB(dbName).getCollection(collectionName); DBObject doc = collection.findOne(query); if (doc == null) { throw new UndefinedDocumentException("DOCUMENT_DOES_NOT_EXIST"); } collection.update(doc, newData, true, false); documentData = collection.findOne(query); } catch (IllegalArgumentException e) { // When error converting object Id throw new DocumentException(ErrorCodes.INVALID_OBJECT_ID, "INVALID_OBJECT_ID"); } catch (MongoException e) { throw new UpdateDocumentException("DOCUMENT_UPDATE_EXCEPTION"); } result = "Updated Document: [" + documentData + "]"; return result; }
From source file:com.liferay.mongodb.hook.listeners.ExpandoColumnListener.java
License:Open Source License
protected void doOnAfterRemove(ExpandoColumn expandoColumn) throws Exception { ExpandoTable expandoTable = ExpandoTableLocalServiceUtil.getTable(expandoColumn.getTableId()); DBCollection dbCollection = MongoDBUtil.getCollection(expandoTable); for (DBObject indexDBObject : dbCollection.getIndexInfo()) { DBObject keyDBObject = (DBObject) indexDBObject.get("key"); if (keyDBObject.containsField(expandoColumn.getName())) { dbCollection.dropIndex(keyDBObject); }// w w w . ja v a2 s . c o m } DBObject operatorDBObject = new BasicDBObject(MongoOperator.UNSET, new BasicDBObject(expandoColumn.getName(), 1)); dbCollection.update(new BasicDBObject(), operatorDBObject, false, true); }
From source file:com.liferay.mongodb.hook.service.impl.MongoExpandoValueLocalServiceImpl.java
License:Open Source License
@Override public void deleteColumnValues(long columnId) { try {//from w w w. ja v a 2s. com ExpandoColumn expandoColumn = ExpandoColumnUtil.fetchByPrimaryKey(columnId); if (expandoColumn == null) { return; } ExpandoTable expandoTable = ExpandoTableLocalServiceUtil.getTable(expandoColumn.getTableId()); DBCollection dbCollection = MongoDBUtil.getCollection(expandoTable); DBObject operatorDBObject = new BasicDBObject(MongoOperator.SET, new BasicDBObject(expandoColumn.getName(), null)); dbCollection.update(new BasicDBObject(), operatorDBObject, false, true); } catch (PortalException pe) { throw new SystemException(pe); } }