List of usage examples for com.mongodb BasicDBObject toString
@SuppressWarnings("deprecation") public String toString()
Returns a JSON serialization of this object
The output will look like: {"a":1, "b":["x","y","z"]} }
From source file:com.ikanow.infinit.e.processing.custom.utils.InfiniteHadoopUtils.java
License:Open Source License
public static void authenticateShareList(CustomMapReduceJobPojo cmr, String[] path) { ArrayList<ObjectId> list = new ArrayList<ObjectId>(path.length); for (String s : path) { list.add(new ObjectId(s)); }/* w w w. ja va 2s . c o m*/ BasicDBObject query = new BasicDBObject(SharePojo._id_, new BasicDBObject(DbManager.in_, list)); query.put(ShareCommunityPojo.shareQuery_id_, new BasicDBObject(DbManager.ne_, new BasicDBObject(DbManager.in_, list))); BasicDBObject fields = new BasicDBObject(ShareCommunityPojo.shareQuery_id_, 1); DBCursor dbc = DbManager.getSocial().getShare().find(query, fields); StringBuffer sb = new StringBuffer(); for (Object o : dbc) { BasicDBObject dbo = (BasicDBObject) o; if (0 != sb.length()) { sb.append(", "); sb.append(dbo.toString()); } } if (sb.length() > 0) { throw new RuntimeException("Share authentication error: " + sb.toString()); } }
From source file:com.ikanow.infinit.e.processing.generic.aggregation.AggregationManager.java
License:Open Source License
public static void updateDocEntitiesFromDeletedDocuments(String uuid) { String outCollection = new StringBuilder(uuid).append("_AggregationUtils").toString(); try {//from w ww. ja v a 2 s . c om PropertiesManager props = new PropertiesManager(); if (props.getAggregationDisabled()) { // (no need to do this) return; } DBCollection outColl = DbManager.getDB("doc_metadata").getCollection(outCollection); DBCursor dbc = outColl.find(); for (DBObject dbo : dbc) { BasicDBObject entityEl = (BasicDBObject) dbo; BasicDBObject entityVal = (BasicDBObject) entityEl.get("value"); long nDocDecrement = entityVal.getLong("dc"); long nFreqDecrement = entityVal.getLong("f"); long nCurrFreq = entityVal.getLong("tf"); long nCurrDocCount = entityVal.getLong("tdc"); // (These are by construction the lowest values so this will provide some defence against going -ve) if (nDocDecrement > nCurrDocCount) { nDocDecrement = nCurrDocCount; } if (nFreqDecrement > nCurrFreq) { nFreqDecrement = nCurrFreq; } BasicDBObject entityId = (BasicDBObject) entityEl.get("_id"); ObjectId commId = null; Object commObj = entityId.get("comm"); if (commObj instanceof ObjectId) { commId = entityId.getObjectId("comm"); } String index = (String) entityId.get("index"); if ((null == index) || (null == commId)) { continue; // random error } BasicDBObject updateQuery = new BasicDBObject(EntityFeaturePojo.index_, index); updateQuery.put(EntityFeaturePojo.communityId_, commId); BasicDBObject entityUpdate1 = new BasicDBObject(EntityFeaturePojo.doccount_, -nDocDecrement); entityUpdate1.put(EntityFeaturePojo.totalfreq_, -nFreqDecrement); BasicDBObject entityUpdate = new BasicDBObject(DbManager.inc_, entityUpdate1); if (_diagnosticMode) { if (_logInDiagnosticMode) System.out.println("UPDATE FEATURE DATABASE: " + updateQuery.toString() + "/" + entityUpdate.toString()); } else { DbManager.getFeature().getEntity().update(updateQuery, entityUpdate); // (can be a single query because the query is on index, the shard) } //TESTED if ((nDocDecrement < nCurrDocCount) && (nDocDecrement * 10 > nCurrDocCount)) { // ie there are some documents left // and the doc count has shifted by more than 10% BasicDBObject updateQuery2 = new BasicDBObject(EntityPojo.docQuery_index_, index); updateQuery2.put(DocumentPojo.communityId_, commId); BasicDBObject entityUpdate2_1 = new BasicDBObject(EntityPojo.docUpdate_doccount_, nCurrDocCount - nDocDecrement); entityUpdate2_1.put(EntityPojo.docUpdate_totalfrequency_, nCurrFreq - nFreqDecrement); BasicDBObject entityUpdate2 = new BasicDBObject(DbManager.set_, entityUpdate2_1); if (_diagnosticMode) { if (_logInDiagnosticMode) System.out.println("UPDATE DOC DATABASE: " + updateQuery2.toString() + "/" + entityUpdate2.toString()); } else { DbManager.getDocument().getMetadata().update(updateQuery2, entityUpdate2, false, true); } } } //TESTED (including when to update logic above) } catch (Exception e) { e.printStackTrace(); } // Tidy up DbManager.getDB("doc_metadata").getCollection(outCollection).drop(); }
From source file:com.ikanow.infinit.e.processing.generic.aggregation.AssociationAggregationUtils.java
License:Open Source License
/** * Add events to the elastic search index for events * and the mongodb collection/* www.j a v a2 s . c o m*/ * so they are searchable for searchsuggest * * Step 1.a, try to just update alias's * Step 1.b, if fail, create new entry * * Step 2, Update totalfreq and doccount * * Step 3, After updating totalfreq and doccount, write to ES for every group * * @param events */ public static void updateEventFeatures(Map<String, Map<ObjectId, AssociationFeaturePojo>> eventFeatures) { // Some diagnostic counters: int numCacheMisses = 0; int numCacheHits = 0; int numNewAssocs = 0; long entityAggregationTime = new Date().getTime(); DBCollection col = DbManager.getFeature().getAssociation(); // (This fn is normally run for a single community id) CommunityFeatureCaches.CommunityFeatureCache currCache = null; String savedSyncTime = null; for (Map<ObjectId, AssociationFeaturePojo> evtCommunity : eventFeatures.values()) { Iterator<Map.Entry<ObjectId, AssociationFeaturePojo>> it = evtCommunity.entrySet().iterator(); while (it.hasNext()) { Map.Entry<ObjectId, AssociationFeaturePojo> evtFeatureKV = it.next(); try { AssociationFeaturePojo evtFeature = evtFeatureKV.getValue(); long nSavedDocCount = evtFeature.getDoccount(); ObjectId communityID = evtFeature.getCommunityId(); if ((null == currCache) || !currCache.getCommunityId().equals(evtFeatureKV.getKey())) { currCache = CommunityFeatureCaches.getCommunityFeatureCache(evtFeatureKV.getKey()); if (_diagnosticMode) { if (_logInDiagnosticMode) System.out.println( "AssociationAggregationUtils.updateEventFeatures, Opened cache for community: " + evtFeatureKV.getKey()); } } //TESTED (by hand) // Is this in our cache? If so can short cut a bunch of the DB interaction: AssociationFeaturePojo cachedAssoc = currCache.getCachedAssocFeature(evtFeature); if (null != cachedAssoc) { if (_incrementalMode) { if (_diagnosticMode) { if (_logInDiagnosticMode) System.out.println( "AssociationAggregationUtils.updateEventFeatures, skip cached: " + cachedAssoc.toDb()); //TODO (INF-2825): should be continue-ing here so can use delta more efficiently... } } else if (_diagnosticMode) { if (_logInDiagnosticMode) System.out .println("AssociationAggregationUtils.updateEventFeatures, grabbed cached: " + cachedAssoc.toDb()); } numCacheHits++; } //TESTED (by hand) else { numCacheMisses++; } //try to update BasicDBObject query = new BasicDBObject(AssociationFeaturePojo.index_, evtFeature.getIndex()); query.put(AssociationFeaturePojo.communityId_, communityID); //Step1 try to update alias //update arrays BasicDBObject multiopAliasArrays = new BasicDBObject(); // Entity1 Alias: if (null != evtFeature.getEntity1_index()) { evtFeature.addEntity1(evtFeature.getEntity1_index()); } if (null != evtFeature.getEntity1()) { if ((null == cachedAssoc) || (null == cachedAssoc.getEntity1()) || !cachedAssoc.getEntity1().containsAll(evtFeature.getEntity1())) { BasicDBObject multiopE = new BasicDBObject(MongoDbManager.each_, evtFeature.getEntity1()); multiopAliasArrays.put(AssociationFeaturePojo.entity1_, multiopE); } } //TESTED (by hand) // Entity2 Alias: if (null != evtFeature.getEntity2_index()) { evtFeature.addEntity2(evtFeature.getEntity2_index()); } if (null != evtFeature.getEntity2()) { if ((null == cachedAssoc) || (null == cachedAssoc.getEntity2()) || !cachedAssoc.getEntity2().containsAll(evtFeature.getEntity2())) { BasicDBObject multiopE = new BasicDBObject(MongoDbManager.each_, evtFeature.getEntity2()); multiopAliasArrays.put(AssociationFeaturePojo.entity2_, multiopE); } } //TESTED (by hand) // verb/verb cat alias: if (null != evtFeature.getVerb_category()) { evtFeature.addVerb(evtFeature.getVerb_category()); } if (null != evtFeature.getVerb()) { if ((null == cachedAssoc) || (null == cachedAssoc.getVerb()) || !cachedAssoc.getVerb().containsAll(evtFeature.getVerb())) { BasicDBObject multiopE = new BasicDBObject(MongoDbManager.each_, evtFeature.getVerb()); multiopAliasArrays.put(AssociationFeaturePojo.verb_, multiopE); } } //TESTED (by hand) // OK - now we can copy across the fields into the cache: if (null != cachedAssoc) { currCache.updateCachedAssocFeatureStatistics(cachedAssoc, evtFeature); //(evtFeature is now fully up to date) } //TESTED (by hand) BasicDBObject updateOp = new BasicDBObject(); if (!multiopAliasArrays.isEmpty()) { updateOp.put(MongoDbManager.addToSet_, multiopAliasArrays); } // Document count for this event BasicDBObject updateFreqDocCount = new BasicDBObject(AssociationFeaturePojo.doccount_, nSavedDocCount); updateOp.put(MongoDbManager.inc_, updateFreqDocCount); BasicDBObject fields = new BasicDBObject(AssociationFeaturePojo.doccount_, 1); fields.put(AssociationFeaturePojo.entity1_, 1); fields.put(AssociationFeaturePojo.entity2_, 1); fields.put(AssociationFeaturePojo.verb_, 1); //(slightly annoying, since only want these if updating dc but won't know // until after i've got this object) fields.put(AssociationFeaturePojo.db_sync_time_, 1); fields.put(AssociationFeaturePojo.db_sync_doccount_, 1); DBObject dboUpdate = null; if (_diagnosticMode) { if (null == cachedAssoc) { dboUpdate = col.findOne(query, fields); } } else { if (null != cachedAssoc) { col.update(query, updateOp, false, false); } else { // Not cached - so have to grab the feature we're either getting or creating dboUpdate = col.findAndModify(query, fields, new BasicDBObject(), false, updateOp, false, true); // (can use findAndModify because specify index, ie the shard key) // (returns event before the changes above, update the feature object below) // (also atomically creates the object if it doesn't exist so is "distributed-safe") } } if ((null != cachedAssoc) || ((dboUpdate != null) && !dboUpdate.keySet().isEmpty())) // (feature already exists) { AssociationFeaturePojo egp = cachedAssoc; if (null == egp) { egp = AssociationFeaturePojo.fromDb(dboUpdate, AssociationFeaturePojo.class); evtFeature.setDoccount(egp.getDoccount() + nSavedDocCount); evtFeature.setDb_sync_doccount(egp.getDb_sync_doccount()); evtFeature.setDb_sync_time(egp.getDb_sync_time()); if (null != egp.getEntity1()) { for (String ent : egp.getEntity1()) evtFeature.addEntity1(ent); } if (null != egp.getEntity2()) { for (String ent : egp.getEntity2()) evtFeature.addEntity2(ent); } if (null != egp.getVerb()) { for (String verb : egp.getVerb()) evtFeature.addVerb(verb); } } //TESTED (cached and non-cached cases) // (in the cached case, evtFeature has already been updated by updateCachedAssocFeatureStatistics) if (_diagnosticMode) { if (_logInDiagnosticMode) System.out.println("AssociationAggregationUtils.updateEventFeatures, found: " + ((BasicDBObject) egp.toDb()).toString()); if (_logInDiagnosticMode) System.out.println( "AssociationAggregationUtils.updateEventFeatures, ^^^ found from query: " + query.toString() + " / " + updateOp.toString()); } // (In background aggregation mode we update db_sync_prio when checking the -otherwise unused, unlike entities- document update schedule) } else // (the object in memory is now an accurate representation of the database, minus some fields we'll now add) { numNewAssocs++; // Synchronization settings for the newly created object evtFeature.setDb_sync_doccount(nSavedDocCount); if (null == savedSyncTime) { savedSyncTime = Long.toString(System.currentTimeMillis()); } evtFeature.setDb_sync_time(savedSyncTime); // This is all "distributed safe" (apart from the db_syc_xxx and it doesn't matter if that is // out of date, the update will just be slightly out-of-date at worst) since (otherwise) these fields are // only set here, and the findAndModify is atomic BasicDBObject baseFields = new BasicDBObject(); if (null != evtFeature.getEntity1_index()) { baseFields.put(AssociationFeaturePojo.entity1_index_, evtFeature.getEntity1_index()); } if (null != evtFeature.getEntity2_index()) { baseFields.put(AssociationFeaturePojo.entity2_index_, evtFeature.getEntity2_index()); } if (null != evtFeature.getVerb_category()) { baseFields.put(AssociationFeaturePojo.verb_category_, evtFeature.getVerb_category()); } baseFields.put(AssociationFeaturePojo.assoc_type_, evtFeature.getAssociation_type()); baseFields.put(AssociationFeaturePojo.db_sync_doccount_, evtFeature.getDb_sync_doccount()); baseFields.put(AssociationFeaturePojo.db_sync_time_, evtFeature.getDb_sync_time()); baseFields.put(AssociationFeaturePojo.db_sync_prio_, 1000.0); // (ensures new objects are quickly index-synchronized) if (!_diagnosticMode) { // Store the object col.update(query, new BasicDBObject(MongoDbManager.set_, baseFields)); } else { if (_logInDiagnosticMode) System.out.println("AssociationAggregationUtils.updateEventFeatures, not found: " + query.toString() + " / " + baseFields.toString() + "/ orig_update= " + updateOp.toString()); } // (Note even in background aggregation mode we still perform the feature synchronization // for new entities - and it has to be right at the end because it "corrupts" the objects) } //(end if first time seen) if (null == cachedAssoc) { // First time we've seen this locally, so add to cache currCache.addCachedAssocFeature(evtFeature); if (_diagnosticMode) { if (_logInDiagnosticMode) System.out .println("AssociationAggregationUtils.updateEventFeatures, added to cache: " + evtFeature.toDb()); } } //TESTED (by hand) } catch (Exception e) { // Exception, remove from feature list it.remove(); // If an exception occurs log the error logger.error("Exception Message: " + e.getMessage(), e); } } // (end loop over all communities for the set of features sharing and index) } // (end loop over indexes) if ((numCacheHits > 0) || (numCacheMisses > 0)) { // ie some assocs were grabbed int cacheSize = 0; if (null != currCache) { cacheSize = currCache.getAssocCacheSize(); } StringBuffer logMsg = new StringBuffer() // (should append key, but don't have that...) .append(" assoc_agg_time_ms=").append(new Date().getTime() - entityAggregationTime) .append(" total_assocs=").append(eventFeatures.size()).append(" new_assocs=") .append(numNewAssocs).append(" cache_misses=").append(numCacheMisses).append(" cache_hits=") .append(numCacheHits).append(" cache_size=").append(cacheSize); logger.info(logMsg.toString()); } }
From source file:com.ikanow.infinit.e.processing.generic.aggregation.AssociationAggregationUtils.java
License:Open Source License
public static void synchronizeEventFeature(AssociationFeaturePojo eventFeature, ObjectId communityId) { DBCollection eventFeatureDb = DbManager.getFeature().getAssociation(); // NOTE: Important that feeds update occurs before synchronization, since the sync "corrupts" the event if (_diagnosticMode || (null != eventFeature.getDb_sync_time()) || (null != eventFeature.getDb_sync_prio())) { // Else this is a new feature so don't need to update the feature DB, only the index (if db_sync_prio null then have to update to avoid b/g aggergation loop) // (note that db_sync_prio will in practice not be set when this is a new feature because it will have same sync_doccount as doc_count) long nCurrTime = System.currentTimeMillis(); //(query from top of the function, basically lookup on gaz_index) BasicDBObject update2 = new BasicDBObject(); update2.put(AssociationFeaturePojo.db_sync_time_, Long.toString(nCurrTime)); update2.put(AssociationFeaturePojo.db_sync_doccount_, eventFeature.getDoccount()); BasicDBObject update = new BasicDBObject(MongoDbManager.set_, update2); // (also can be added to below) BasicDBObject update3 = new BasicDBObject(EntityFeaturePojo.db_sync_prio_, 1); update.put(MongoDbManager.unset_, update3); BasicDBObject query = new BasicDBObject(AssociationFeaturePojo.index_, eventFeature.getIndex()); query.put(AssociationFeaturePojo.communityId_, communityId); // Keep the number of entity1 and entity2 sets down to a reasonable number // (In the end would like to be able to do this based on date rather than (essentially) completely randomly) int nSize; BasicDBObject toPull = null;//from ww w . j av a 2 s .c o m if (null != eventFeature.getEntity1()) { if ((nSize = eventFeature.getEntity1().size()) > AssociationFeaturePojo.entity_MAXFIELDS) { if (null == toPull) toPull = new BasicDBObject(); ArrayList<String> ent1ToRemove = new ArrayList<String>( eventFeature.getEntity1().size() - AssociationFeaturePojo.entity_MAXFIELDS); Iterator<String> it = eventFeature.getEntity1().iterator(); while (it.hasNext() && (nSize > AssociationFeaturePojo.entity_MAXFIELDS)) { String ent = it.next(); if (-1 == ent.indexOf('/')) { // (ie don't remove the index) nSize--; it.remove(); // (this removes from the index) ent1ToRemove.add(ent); } } toPull.put(AssociationFeaturePojo.entity1_, ent1ToRemove); // (this removes from the database) } } if (null != eventFeature.getEntity2()) { if ((nSize = eventFeature.getEntity2().size()) > AssociationFeaturePojo.entity_MAXFIELDS) { if (null == toPull) toPull = new BasicDBObject(); ArrayList<String> ent2ToRemove = new ArrayList<String>( eventFeature.getEntity2().size() - AssociationFeaturePojo.entity_MAXFIELDS); Iterator<String> it = eventFeature.getEntity2().iterator(); while (it.hasNext() && (nSize > AssociationFeaturePojo.entity_MAXFIELDS)) { String ent = it.next(); if (-1 == ent.indexOf('/')) { // (ie don't remove the index) nSize--; it.remove(); // (this removes from the index) ent2ToRemove.add(ent); } } toPull.put(AssociationFeaturePojo.entity2_, ent2ToRemove); // (this removes from the database) } } if (null != toPull) { update.put(MongoDbManager.pullAll_, toPull); // (this removes from the database) } //TESTED (2.1.4.3b, including no index removal clause) if (_diagnosticMode) { if ((null != eventFeature.getDb_sync_time()) || (null != eventFeature.getDb_sync_prio())) { if (_logInDiagnosticMode) System.out.println("AssociationAggregationUtils.synchronizeEventFeature, featureDB: " + query.toString() + " / " + update.toString()); } else { if (_logInDiagnosticMode) System.out.println( "(WOULD NOT RUN) EventAggregationUtils.synchronizeEventFeature, featureDB: " + query.toString() + " / " + update.toString()); } } else { eventFeatureDb.update(query, update, false, true); } } if (_diagnosticMode) { if (_logInDiagnosticMode) System.out.println("AssociationAggregationUtils.synchronizeEventFeature, synchronize: " + new StringBuffer(eventFeature.getIndex()).append(':').append(communityId).toString() + " = " + IndexManager.mapToIndex(eventFeature, new AssociationFeaturePojoIndexMap())); } else { ElasticSearchManager esm = IndexManager.getIndex(AssociationFeaturePojoIndexMap.indexName_); esm.addDocument(eventFeature, new AssociationFeaturePojoIndexMap(), null, true); } }
From source file:com.ikanow.infinit.e.processing.generic.aggregation.AssociationAggregationUtils.java
License:Open Source License
public static void markAssociationFeatureForSync(AssociationFeaturePojo assocFeature, ObjectId communityId) { DBCollection assocFeatureDb = DbManager.getFeature().getAssociation(); double dPrio = 100.0 * (double) assocFeature.getDoccount() / (0.01 + (double) assocFeature.getDb_sync_doccount()); assocFeature.setDb_sync_prio(dPrio); BasicDBObject query = new BasicDBObject(AssociationFeaturePojo.index_, assocFeature.getIndex()); query.put(AssociationFeaturePojo.communityId_, communityId); BasicDBObject update = new BasicDBObject(MongoDbManager.set_, new BasicDBObject(AssociationFeaturePojo.db_sync_prio_, dPrio)); if (_diagnosticMode) { if (_logInDiagnosticMode) System.out.println("EntityAggregationUtils.markAssociationFeatureForSync, featureDB: " + query.toString() + " / " + update.toString()); } else {/*from www . j ava2 s .c o m*/ assocFeatureDb.update(query, update, false, true); } }
From source file:com.ikanow.infinit.e.processing.generic.aggregation.AssociationBackgroundAggregationManager.java
License:Open Source License
private boolean getToken() { boolean bHaveControl = false; final String id_ = "_id"; final String hostname_ = "hostname"; final String oneUp_ = "1up"; String savedHostname = ""; String savedOneUp = ""; while (!_bKillMe) { // Get IP address: BasicDBObject query = new BasicDBObject(id_, _assocLockId); BasicDBObject lockObj = (BasicDBObject) MongoDbManager.getFeature().getAggregationLock().findOne(query); if (null == lockObj) { lockObj = new BasicDBObject(id_, _assocLockId); lockObj.put(hostname_, getHostname()); lockObj.put(oneUp_, Long.toString(1000000L * (new Date().getTime() % 10000))); // (ie a randomish start number) MongoDbManager.getFeature().getAggregationLock().insert(lockObj); // (will fail if another harvester gets there first) logger.debug("Creating a new aggregation lock object: " + lockObj.toString()); lockObj = (BasicDBObject) MongoDbManager.getFeature().getAggregationLock().findOne(); } //TESTED // So by here lockObj is always non-null // Do I have control? String hostname = lockObj.getString(hostname_); String oneUp = lockObj.getString(oneUp_); bHaveControl = getHostname().equals(hostname); if (!bHaveControl) { // Don't currently have control if (savedHostname.equals(hostname) && savedOneUp.equals(oneUp)) { // Now I have control... logger.debug("I am taking control from: " + hostname + ", " + oneUp); bHaveControl = true;/*from w w w .j a va 2s . c om*/ _nGrabbedControl++; } else if (getHostname().equals(savedHostname)) { // I had control of this last time I checked logger.debug("Lost control to: " + hostname); _nLostControl++; } } else { logger.debug("I have control already: " + hostname); } //TESTED if (bHaveControl) { savedHostname = hostname; long nOneUp = Long.parseLong(oneUp); lockObj.put(hostname_, getHostname()); lockObj.put(oneUp_, Long.toString(nOneUp + 1)); MongoDbManager.getFeature().getAggregationLock().save(lockObj); return true; } //TESTED else { // Save info and sleep for 60s savedHostname = hostname; savedOneUp = oneUp; logger.debug("Saving state and sleeping: " + savedHostname + ", " + savedOneUp); for (int i = 0; (i < 6) && !_bKillMe; ++i) { try { Thread.sleep(10000); } catch (InterruptedException e) { } } } //TESTED } // end loop forever return bHaveControl; }
From source file:com.ikanow.infinit.e.processing.generic.aggregation.EntityAggregationUtils.java
License:Open Source License
/** * Updates the feature entries for the list of entities * that was just extracted including changing frequency, * adding aliases etc//from w w w.ja va 2 s . c o m * * This method now has 3 steps: * 1. Try to update alias * 1.a If fail, create new gaz * 2. Update totalfreq and doccount * * @param ents List of entities to update in the entity feature */ public static void updateEntityFeatures(Map<String, Map<ObjectId, EntityFeaturePojo>> entFeatures) { // Some diagnostic counters: int numCacheMisses = 0; int numCacheHits = 0; int numNewEntities = 0; long entityAggregationTime = new Date().getTime(); DBCollection col = DbManager.getFeature().getEntity(); // (This fn is normally run for a single community id) CommunityFeatureCaches.CommunityFeatureCache currCache = null; String savedSyncTime = null; for (Map<ObjectId, EntityFeaturePojo> entCommunity : entFeatures.values()) { Iterator<Map.Entry<ObjectId, EntityFeaturePojo>> it = entCommunity.entrySet().iterator(); while (it.hasNext()) { Map.Entry<ObjectId, EntityFeaturePojo> entFeatureKV = it.next(); try { EntityFeaturePojo entFeature = entFeatureKV.getValue(); long nSavedDocCount = entFeature.getDoccount(); long nSavedFreqCount = entFeature.getTotalfreq(); // (these should be constant across all communities but keep it here // so can assign it using entFeature, it's v cheap so no need to get once like for sync vars) // For each community, see if the entity feature already exists *for that community* ObjectId communityID = entFeature.getCommunityId(); if (null != communityID) { if ((null == currCache) || !currCache.getCommunityId().equals(entFeatureKV.getKey())) { currCache = CommunityFeatureCaches.getCommunityFeatureCache(entFeatureKV.getKey()); if (_diagnosticMode) { if (_logInDiagnosticMode) System.out.println( "EntityAggregationUtils.updateEntityFeatures, Opened cache for community: " + entFeatureKV.getKey()); } } //TESTED (by hand) // Is this in our cache? If so can short cut a bunch of the DB interaction: EntityFeaturePojo cachedEnt = currCache.getCachedEntityFeature(entFeature); if (null != cachedEnt) { if (_incrementalMode) { if (_diagnosticMode) { if (_logInDiagnosticMode) System.out.println( "EntityAggregationUtils.updateEntityFeatures, skip cached: " + cachedEnt.toDb()); //TODO (INF-2825): should be continue-ing here (after implementing incremental caching fully) so can use delta more efficiently... } } else if (_diagnosticMode) { if (_logInDiagnosticMode) System.out .println("EntityAggregationUtils.updateEntityFeatures, grabbed cached: " + cachedEnt.toDb()); } numCacheHits++; } //TESTED (by hand) else { numCacheMisses++; } BasicDBObject query = new BasicDBObject(EntityFeaturePojo.index_, entFeature.getIndex()); query.put(EntityFeaturePojo.communityId_, communityID); BasicDBObject updateOp = new BasicDBObject(); // Add aliases: BasicDBObject updateOpA = new BasicDBObject(); if (null != entFeature.getAlias()) { if ((null == cachedEnt) || (null == cachedEnt.getAlias()) || !cachedEnt.getAlias().containsAll(entFeature.getAlias())) { //(if the data we have is already cached, don't bother adding it again) BasicDBObject multiopE = new BasicDBObject(MongoDbManager.each_, entFeature.getAlias()); updateOpA.put(EntityFeaturePojo.alias_, multiopE); } //TESTED (by hand) } // Add link data, if there is any: if ((null != entFeature.getSemanticLinks()) && !entFeature.getSemanticLinks().isEmpty()) { if ((null == cachedEnt) || (null == cachedEnt.getSemanticLinks()) || !cachedEnt.getSemanticLinks().containsAll(entFeature.getSemanticLinks())) { //(if the data we have is already cached, don't bother adding it again) BasicDBObject multiopF = new BasicDBObject(MongoDbManager.each_, entFeature.getSemanticLinks()); updateOpA.put(EntityFeaturePojo.linkdata_, multiopF); } //TESTED (by hand) } // OK - now we can copy across the fields into the cache: if (null != cachedEnt) { currCache.updateCachedEntityFeatureStatistics(cachedEnt, entFeature); //(entFeature is now fully up to date) } //TESTED (by hand) if (!updateOpA.isEmpty()) { updateOp.put(MongoDbManager.addToSet_, updateOpA); } // Update frequency: BasicDBObject updateOpB = new BasicDBObject(); updateOpB.put(EntityFeaturePojo.totalfreq_, nSavedFreqCount); updateOpB.put(EntityFeaturePojo.doccount_, nSavedDocCount); updateOp.put(MongoDbManager.inc_, updateOpB); //try to use find/modify to see if something comes back and set doc freq/totalfreq BasicDBObject fields = new BasicDBObject(EntityFeaturePojo.totalfreq_, 1); fields.put(EntityFeaturePojo.doccount_, 1); fields.put(EntityFeaturePojo.alias_, 1); fields.put(EntityFeaturePojo.linkdata_, 1); //(slightly annoying, since only want these 2 largish fields if updating freq but won't know // until after i've got this object) fields.put(EntityFeaturePojo.db_sync_time_, 1); fields.put(EntityFeaturePojo.db_sync_doccount_, 1); DBObject dboUpdate = null; if (_diagnosticMode) { if (null == cachedEnt) { dboUpdate = col.findOne(query, fields); } } else { if (null != cachedEnt) { col.update(query, updateOp, false, false); } else { // Not cached - so have to grab the feature we're either getting or creating dboUpdate = col.findAndModify(query, fields, new BasicDBObject(), false, updateOp, false, true); // (can use findAndModify because specify index, ie the shard key) // (returns entity before the changes above, update the feature object below) // (also atomically creates the object if it doesn't exist so is "distributed-safe") } } if ((null != cachedEnt) || ((dboUpdate != null) && !dboUpdate.keySet().isEmpty())) // (feature already exists) { EntityFeaturePojo gp = cachedEnt; // (Update the entity feature to be correct so that it can be accurately synchronized with the index) if (null == gp) { gp = EntityFeaturePojo.fromDb(dboUpdate, EntityFeaturePojo.class); entFeature.setTotalfreq(gp.getTotalfreq() + nSavedFreqCount); entFeature.setDoccount(gp.getDoccount() + nSavedDocCount); entFeature.setDbSyncDoccount(gp.getDbSyncDoccount()); entFeature.setDbSyncTime(gp.getDbSyncTime()); if (null != gp.getAlias()) { entFeature.addAllAlias(gp.getAlias()); } if (null != gp.getSemanticLinks()) { entFeature.addToSemanticLinks(gp.getSemanticLinks()); } } //TESTED (cached case and non-cached case) // (in the cached case, entFeature has already been updated by updateCachedEntityFeatureStatistics) if (_diagnosticMode) { if (_logInDiagnosticMode) System.out.println("EntityAggregationUtils.updateEntityFeatures, found: " + ((BasicDBObject) gp.toDb()).toString()); if (_logInDiagnosticMode) System.out.println( "EntityAggregationUtils.updateEntityFeatures, ^^^ found from query: " + query.toString() + " / " + updateOp.toString()); } // (In background aggregation mode we update db_sync_prio when checking the doc update schedule) } else // (the object in memory is now an accurate representation of the database, minus some fields we'll now add) { numNewEntities++; // Synchronization settings for the newly created object if (null == savedSyncTime) { savedSyncTime = Long.toString(System.currentTimeMillis()); } entFeature.setDbSyncDoccount(nSavedDocCount); entFeature.setDbSyncTime(savedSyncTime); // This is all "distributed safe" (apart from the db_syc_xxx and it doesn't matter if that is // out of date, the update will just be slightly out-of-date at worst) since (otherwise) these fields are // only set here, and the findAndModify is atomic // (Do in raw MongoDB for performance) BasicDBObject baseFields = new BasicDBObject(); baseFields.put(EntityFeaturePojo.dimension_, entFeature.getDimension().toString()); baseFields.put(EntityFeaturePojo.type_, entFeature.getType()); baseFields.put(EntityFeaturePojo.disambiguated_name_, entFeature.getDisambiguatedName()); baseFields.put(EntityFeaturePojo.db_sync_doccount_, entFeature.getDbSyncDoccount()); baseFields.put(EntityFeaturePojo.db_sync_prio_, 1000.0); baseFields.put(EntityFeaturePojo.db_sync_time_, entFeature.getDbSyncTime()); if ((null != entFeature.getSemanticLinks()) && !entFeature.getSemanticLinks().isEmpty()) { baseFields.put(EntityFeaturePojo.linkdata_, entFeature.getSemanticLinks()); } //attempt to add geotag (makes necessary checks on util side) //also add ontology type if geotag is found EntityGeotagAggregationUtils.addEntityGeo(entFeature); if (entFeature.getGeotag() != null) { BasicDBObject geo = new BasicDBObject(GeoPojo.lat_, entFeature.getGeotag().lat); geo.put(GeoPojo.lon_, entFeature.getGeotag().lon); baseFields.put(EntityFeaturePojo.geotag_, geo); if (entFeature.getOntology_type() != null) { baseFields.put(EntityFeaturePojo.ontology_type_, entFeature.getOntology_type()); } } if (!_diagnosticMode) { // Store the object col.update(query, new BasicDBObject(MongoDbManager.set_, baseFields)); } else { if (_logInDiagnosticMode) System.out.println("EntityAggregationUtils.updateEntityFeatures, not found: " + query.toString() + ": " + baseFields.toString()); } } //(end first time this feature seen - globally) if (null == cachedEnt) { // First time we've seen this locally, so add to cache currCache.addCachedEntityFeature(entFeature); if (_diagnosticMode) { if (_logInDiagnosticMode) System.out .println("EntityAggregationUtils.updateEntityFeatures, added to cache: " + entFeature.toDb()); } } //TESTED (by hand) } //(end if community id assigned) } catch (Exception e) { // Exception, remove from feature list it.remove(); // If an exception occurs log the error logger.error("Exception Message: " + e.getMessage(), e); } } // (end loop over communities) } // (end loop over indexes) if ((numCacheHits > 0) || (numCacheMisses > 0)) { // ie some ents were grabbed int cacheSize = 0; if (null != currCache) { cacheSize = currCache.getEntityCacheSize(); } StringBuffer logMsg = new StringBuffer() // (should append key, but don't have that...) .append(" ent_agg_time_ms=").append(new Date().getTime() - entityAggregationTime) .append(" total_ents=").append(entFeatures.size()).append(" new_ents=").append(numNewEntities) .append(" cache_misses=").append(numCacheMisses).append(" cache_hits=").append(numCacheHits) .append(" cache_size=").append(cacheSize); logger.info(logMsg.toString()); } }
From source file:com.ikanow.infinit.e.processing.generic.aggregation.EntityAggregationUtils.java
License:Open Source License
public static void updateMatchingEntities(EntityFeaturePojo entFeature, ObjectId communityId) { String index = entFeature.getIndex(); long totalFreq = entFeature.getTotalfreq(); long docCount = entFeature.getDoccount(); try {/*from ww w . j av a 2 s. co m*/ DBCollection docDb = DbManager.getDocument().getMetadata(); BasicDBObject query1 = new BasicDBObject(); query1.put(EntityPojo.docQuery_index_, index); query1.put(DocumentPojo.communityId_, communityId); BasicDBObject multiopB = new BasicDBObject(); multiopB.put(EntityPojo.docUpdate_totalfrequency_, totalFreq); multiopB.put(EntityPojo.docUpdate_doccount_, docCount); BasicDBObject multiopA = new BasicDBObject(MongoDbManager.set_, multiopB); if (_diagnosticMode) { if (_logInDiagnosticMode) System.out.println("EntityAggregationUtils.updateMatchingEntities: " + query1.toString() + " / " + multiopA.toString()); } else { synchronized (GenericProcessingController.class) { // Because this op can be slow, and traverse a lot of disk, need to ensure that // we don't allow all the threads to hammer it at once (the updates all yield to each other // enough that the disk goes totally crazy) docDb.update(query1, multiopA, false, true); DbManager.getDocument().getLastError(DbManager.getDocument().getMetadata().getName()); // (enforce consecutive accesses for this potentially very slow operation) } // Was originally checked updatedExisting but for INF-1406, it sometimes seemed to be // checking the wrong command. I suspect the reason we had this code in here has gone away, // and it doesn't matter if this update occasionally fails anyway, it will just be out of date // so the check/retry has been removed. } } catch (Exception ex) { logger.error(ex.getMessage(), ex); } }
From source file:com.ikanow.infinit.e.processing.generic.aggregation.EntityAggregationUtils.java
License:Open Source License
public static void synchronizeEntityFeature(EntityFeaturePojo entityFeature, ObjectId communityId) { DBCollection entityFeatureDb = DbManager.getFeature().getEntity(); // NOTE: Important that feeds update occurs before synchronization, since the sync "corrupts" the entity if (_diagnosticMode || (null != entityFeature.getDbSyncTime()) || (null != entityFeature.getDb_sync_prio())) { // Else this is a new feature so don't need to update the feature DB, only the index (if db_sync_prio null then have to update to avoid b/g aggergation loop) // (note that db_sync_prio will in practice not be set when this is a new feature because it will have same sync_doccount as doc_count) long nCurrTime = System.currentTimeMillis(); //(query from top of the function, basically lookup on gaz_index) BasicDBObject update2 = new BasicDBObject(); update2.put(EntityFeaturePojo.db_sync_time_, Long.toString(nCurrTime)); update2.put(EntityFeaturePojo.db_sync_doccount_, entityFeature.getDoccount()); BasicDBObject update = new BasicDBObject(MongoDbManager.set_, update2); BasicDBObject update3 = new BasicDBObject(EntityFeaturePojo.db_sync_prio_, 1); update.put(MongoDbManager.unset_, update3); BasicDBObject query = new BasicDBObject(EntityFeaturePojo.index_, entityFeature.getIndex()); query.put(EntityFeaturePojo.communityId_, communityId); if (_diagnosticMode) { if ((null != entityFeature.getDbSyncTime()) || (null != entityFeature.getDb_sync_prio())) { if (_logInDiagnosticMode) System.out.println("EntityAggregationUtils.synchronizeEntityFeature, featureDB: " + query.toString() + " / " + update.toString()); } else { if (_logInDiagnosticMode) System.out.println( "(WOULD NOT RUN) EntityAggregationUtils.synchronizeEntityFeature, featureDB: " + query.toString() + " / " + update.toString()); }/*from w ww. j a v a 2 s.com*/ } else { entityFeatureDb.update(query, update, false, true); } } if (_diagnosticMode) { if (_logInDiagnosticMode) System.out.println("EntityAggregationUtils.synchronizeEntityFeature, synchronize: " + new StringBuffer(entityFeature.getIndex()).append(':').append(communityId).toString() + " = " + IndexManager.mapToIndex(entityFeature, new EntityFeaturePojoIndexMap())); } else { ElasticSearchManager esm = IndexManager.getIndex(EntityFeaturePojoIndexMap.indexName_); esm.addDocument(entityFeature, new EntityFeaturePojoIndexMap(), null, true); //(_id is set by the index map to index:communityId) } }
From source file:com.ikanow.infinit.e.processing.generic.aggregation.EntityAggregationUtils.java
License:Open Source License
public static void markEntityFeatureForSync(EntityFeaturePojo entityFeature, ObjectId communityId) { DBCollection entityFeatureDb = DbManager.getFeature().getEntity(); double dPrio = 100.0 * (double) entityFeature.getDoccount() / (0.01 + (double) entityFeature.getDbSyncDoccount()); entityFeature.setDb_sync_prio(dPrio); BasicDBObject query = new BasicDBObject(EntityFeaturePojo.index_, entityFeature.getIndex()); query.put(EntityFeaturePojo.communityId_, communityId); BasicDBObject update = new BasicDBObject(MongoDbManager.set_, new BasicDBObject(EntityFeaturePojo.db_sync_prio_, dPrio)); if (_diagnosticMode) { if (_logInDiagnosticMode) System.out.println("EntityAggregationUtils.markEntityFeatureForSynchronization, featureDB: " + query.toString() + " / " + update.toString()); } else {/* ww w . java 2 s . co m*/ entityFeatureDb.update(query, update, false, true); } }