Example usage for com.mongodb BasicDBObject toString

List of usage examples for com.mongodb BasicDBObject toString

Introduction

In this page you can find the example usage for com.mongodb BasicDBObject toString.

Prototype

@SuppressWarnings("deprecation")
public String toString() 

Source Link

Document

Returns a JSON serialization of this object

The output will look like: {"a":1, "b":["x","y","z"]} }

Usage

From source file:com.ikanow.infinit.e.processing.custom.utils.InfiniteHadoopUtils.java

License:Open Source License

public static void authenticateShareList(CustomMapReduceJobPojo cmr, String[] path) {

    ArrayList<ObjectId> list = new ArrayList<ObjectId>(path.length);
    for (String s : path) {
        list.add(new ObjectId(s));
    }/*  w  w  w. ja  va  2s . c o m*/
    BasicDBObject query = new BasicDBObject(SharePojo._id_, new BasicDBObject(DbManager.in_, list));
    query.put(ShareCommunityPojo.shareQuery_id_,
            new BasicDBObject(DbManager.ne_, new BasicDBObject(DbManager.in_, list)));
    BasicDBObject fields = new BasicDBObject(ShareCommunityPojo.shareQuery_id_, 1);
    DBCursor dbc = DbManager.getSocial().getShare().find(query, fields);
    StringBuffer sb = new StringBuffer();
    for (Object o : dbc) {
        BasicDBObject dbo = (BasicDBObject) o;
        if (0 != sb.length()) {
            sb.append(", ");
            sb.append(dbo.toString());
        }
    }
    if (sb.length() > 0) {
        throw new RuntimeException("Share authentication error: " + sb.toString());
    }
}

From source file:com.ikanow.infinit.e.processing.generic.aggregation.AggregationManager.java

License:Open Source License

public static void updateDocEntitiesFromDeletedDocuments(String uuid) {
    String outCollection = new StringBuilder(uuid).append("_AggregationUtils").toString();
    try {//from w  ww. ja v  a  2 s .  c om
        PropertiesManager props = new PropertiesManager();
        if (props.getAggregationDisabled()) { // (no need to do this)
            return;
        }
        DBCollection outColl = DbManager.getDB("doc_metadata").getCollection(outCollection);

        DBCursor dbc = outColl.find();
        for (DBObject dbo : dbc) {
            BasicDBObject entityEl = (BasicDBObject) dbo;
            BasicDBObject entityVal = (BasicDBObject) entityEl.get("value");

            long nDocDecrement = entityVal.getLong("dc");
            long nFreqDecrement = entityVal.getLong("f");
            long nCurrFreq = entityVal.getLong("tf");
            long nCurrDocCount = entityVal.getLong("tdc");

            // (These are by construction the lowest values so this will provide some defence against going -ve)
            if (nDocDecrement > nCurrDocCount) {
                nDocDecrement = nCurrDocCount;
            }
            if (nFreqDecrement > nCurrFreq) {
                nFreqDecrement = nCurrFreq;
            }

            BasicDBObject entityId = (BasicDBObject) entityEl.get("_id");
            ObjectId commId = null;
            Object commObj = entityId.get("comm");
            if (commObj instanceof ObjectId) {
                commId = entityId.getObjectId("comm");
            }
            String index = (String) entityId.get("index");
            if ((null == index) || (null == commId)) {
                continue; // random error
            }

            BasicDBObject updateQuery = new BasicDBObject(EntityFeaturePojo.index_, index);
            updateQuery.put(EntityFeaturePojo.communityId_, commId);
            BasicDBObject entityUpdate1 = new BasicDBObject(EntityFeaturePojo.doccount_, -nDocDecrement);
            entityUpdate1.put(EntityFeaturePojo.totalfreq_, -nFreqDecrement);
            BasicDBObject entityUpdate = new BasicDBObject(DbManager.inc_, entityUpdate1);

            if (_diagnosticMode) {
                if (_logInDiagnosticMode)
                    System.out.println("UPDATE FEATURE DATABASE: " + updateQuery.toString() + "/"
                            + entityUpdate.toString());
            } else {
                DbManager.getFeature().getEntity().update(updateQuery, entityUpdate);
                // (can be a single query because the query is on index, the shard)
            }
            //TESTED

            if ((nDocDecrement < nCurrDocCount) && (nDocDecrement * 10 > nCurrDocCount)) {
                // ie there are some documents left
                // and the doc count has shifted by more than 10%
                BasicDBObject updateQuery2 = new BasicDBObject(EntityPojo.docQuery_index_, index);
                updateQuery2.put(DocumentPojo.communityId_, commId);
                BasicDBObject entityUpdate2_1 = new BasicDBObject(EntityPojo.docUpdate_doccount_,
                        nCurrDocCount - nDocDecrement);
                entityUpdate2_1.put(EntityPojo.docUpdate_totalfrequency_, nCurrFreq - nFreqDecrement);
                BasicDBObject entityUpdate2 = new BasicDBObject(DbManager.set_, entityUpdate2_1);

                if (_diagnosticMode) {
                    if (_logInDiagnosticMode)
                        System.out.println("UPDATE DOC DATABASE: " + updateQuery2.toString() + "/"
                                + entityUpdate2.toString());
                } else {
                    DbManager.getDocument().getMetadata().update(updateQuery2, entityUpdate2, false, true);
                }
            }
        } //TESTED (including when to update logic above)
    } catch (Exception e) {
        e.printStackTrace();
    }

    // Tidy up
    DbManager.getDB("doc_metadata").getCollection(outCollection).drop();
}

From source file:com.ikanow.infinit.e.processing.generic.aggregation.AssociationAggregationUtils.java

License:Open Source License

/**
 * Add events to the elastic search index for events
 * and the mongodb collection/* www.j a v a2 s  .  c  o m*/
 * so they are searchable for searchsuggest
 * 
 * Step 1.a, try to just update alias's
 * Step 1.b, if fail, create new entry
 * 
 * Step 2, Update totalfreq and doccount
 * 
 * Step 3, After updating totalfreq and doccount, write to ES for every group
 * 
 * @param events
 */
public static void updateEventFeatures(Map<String, Map<ObjectId, AssociationFeaturePojo>> eventFeatures) {
    // Some diagnostic counters:
    int numCacheMisses = 0;
    int numCacheHits = 0;
    int numNewAssocs = 0;
    long entityAggregationTime = new Date().getTime();

    DBCollection col = DbManager.getFeature().getAssociation();

    // (This fn is normally run for a single community id)
    CommunityFeatureCaches.CommunityFeatureCache currCache = null;

    String savedSyncTime = null;
    for (Map<ObjectId, AssociationFeaturePojo> evtCommunity : eventFeatures.values()) {

        Iterator<Map.Entry<ObjectId, AssociationFeaturePojo>> it = evtCommunity.entrySet().iterator();
        while (it.hasNext()) {
            Map.Entry<ObjectId, AssociationFeaturePojo> evtFeatureKV = it.next();
            try {
                AssociationFeaturePojo evtFeature = evtFeatureKV.getValue();
                long nSavedDocCount = evtFeature.getDoccount();

                ObjectId communityID = evtFeature.getCommunityId();

                if ((null == currCache) || !currCache.getCommunityId().equals(evtFeatureKV.getKey())) {
                    currCache = CommunityFeatureCaches.getCommunityFeatureCache(evtFeatureKV.getKey());
                    if (_diagnosticMode) {
                        if (_logInDiagnosticMode)
                            System.out.println(
                                    "AssociationAggregationUtils.updateEventFeatures, Opened cache for community: "
                                            + evtFeatureKV.getKey());
                    }
                } //TESTED (by hand)               

                // Is this in our cache? If so can short cut a bunch of the DB interaction:
                AssociationFeaturePojo cachedAssoc = currCache.getCachedAssocFeature(evtFeature);
                if (null != cachedAssoc) {
                    if (_incrementalMode) {
                        if (_diagnosticMode) {
                            if (_logInDiagnosticMode)
                                System.out.println(
                                        "AssociationAggregationUtils.updateEventFeatures, skip cached: "
                                                + cachedAssoc.toDb());
                            //TODO (INF-2825): should be continue-ing here so can use delta more efficiently...
                        }
                    } else if (_diagnosticMode) {
                        if (_logInDiagnosticMode)
                            System.out
                                    .println("AssociationAggregationUtils.updateEventFeatures, grabbed cached: "
                                            + cachedAssoc.toDb());
                    }
                    numCacheHits++;
                } //TESTED (by hand)         
                else {
                    numCacheMisses++;
                }

                //try to update
                BasicDBObject query = new BasicDBObject(AssociationFeaturePojo.index_, evtFeature.getIndex());
                query.put(AssociationFeaturePojo.communityId_, communityID);

                //Step1 try to update alias
                //update arrays
                BasicDBObject multiopAliasArrays = new BasicDBObject();
                // Entity1 Alias:
                if (null != evtFeature.getEntity1_index()) {
                    evtFeature.addEntity1(evtFeature.getEntity1_index());
                }
                if (null != evtFeature.getEntity1()) {
                    if ((null == cachedAssoc) || (null == cachedAssoc.getEntity1())
                            || !cachedAssoc.getEntity1().containsAll(evtFeature.getEntity1())) {
                        BasicDBObject multiopE = new BasicDBObject(MongoDbManager.each_,
                                evtFeature.getEntity1());
                        multiopAliasArrays.put(AssociationFeaturePojo.entity1_, multiopE);
                    }
                } //TESTED (by hand)

                // Entity2 Alias:
                if (null != evtFeature.getEntity2_index()) {
                    evtFeature.addEntity2(evtFeature.getEntity2_index());
                }
                if (null != evtFeature.getEntity2()) {
                    if ((null == cachedAssoc) || (null == cachedAssoc.getEntity2())
                            || !cachedAssoc.getEntity2().containsAll(evtFeature.getEntity2())) {
                        BasicDBObject multiopE = new BasicDBObject(MongoDbManager.each_,
                                evtFeature.getEntity2());
                        multiopAliasArrays.put(AssociationFeaturePojo.entity2_, multiopE);
                    }
                } //TESTED (by hand)

                // verb/verb cat alias:
                if (null != evtFeature.getVerb_category()) {
                    evtFeature.addVerb(evtFeature.getVerb_category());
                }
                if (null != evtFeature.getVerb()) {
                    if ((null == cachedAssoc) || (null == cachedAssoc.getVerb())
                            || !cachedAssoc.getVerb().containsAll(evtFeature.getVerb())) {
                        BasicDBObject multiopE = new BasicDBObject(MongoDbManager.each_, evtFeature.getVerb());
                        multiopAliasArrays.put(AssociationFeaturePojo.verb_, multiopE);
                    }
                } //TESTED (by hand)

                // OK - now we can copy across the fields into the cache:
                if (null != cachedAssoc) {
                    currCache.updateCachedAssocFeatureStatistics(cachedAssoc, evtFeature); //(evtFeature is now fully up to date)
                } //TESTED (by hand)

                BasicDBObject updateOp = new BasicDBObject();
                if (!multiopAliasArrays.isEmpty()) {
                    updateOp.put(MongoDbManager.addToSet_, multiopAliasArrays);
                }
                // Document count for this event
                BasicDBObject updateFreqDocCount = new BasicDBObject(AssociationFeaturePojo.doccount_,
                        nSavedDocCount);
                updateOp.put(MongoDbManager.inc_, updateFreqDocCount);

                BasicDBObject fields = new BasicDBObject(AssociationFeaturePojo.doccount_, 1);
                fields.put(AssociationFeaturePojo.entity1_, 1);
                fields.put(AssociationFeaturePojo.entity2_, 1);
                fields.put(AssociationFeaturePojo.verb_, 1);
                //(slightly annoying, since only want these if updating dc but won't know
                // until after i've got this object)

                fields.put(AssociationFeaturePojo.db_sync_time_, 1);
                fields.put(AssociationFeaturePojo.db_sync_doccount_, 1);

                DBObject dboUpdate = null;
                if (_diagnosticMode) {
                    if (null == cachedAssoc) {
                        dboUpdate = col.findOne(query, fields);
                    }
                } else {
                    if (null != cachedAssoc) {
                        col.update(query, updateOp, false, false);
                    } else { // Not cached - so have to grab the feature we're either getting or creating
                        dboUpdate = col.findAndModify(query, fields, new BasicDBObject(), false, updateOp,
                                false, true);
                        // (can use findAndModify because specify index, ie the shard key)
                        // (returns event before the changes above, update the feature object below)
                        // (also atomically creates the object if it doesn't exist so is "distributed-safe")
                    }
                }
                if ((null != cachedAssoc) || ((dboUpdate != null) && !dboUpdate.keySet().isEmpty())) // (feature already exists)
                {
                    AssociationFeaturePojo egp = cachedAssoc;

                    if (null == egp) {
                        egp = AssociationFeaturePojo.fromDb(dboUpdate, AssociationFeaturePojo.class);
                        evtFeature.setDoccount(egp.getDoccount() + nSavedDocCount);
                        evtFeature.setDb_sync_doccount(egp.getDb_sync_doccount());
                        evtFeature.setDb_sync_time(egp.getDb_sync_time());
                        if (null != egp.getEntity1()) {
                            for (String ent : egp.getEntity1())
                                evtFeature.addEntity1(ent);
                        }
                        if (null != egp.getEntity2()) {
                            for (String ent : egp.getEntity2())
                                evtFeature.addEntity2(ent);
                        }
                        if (null != egp.getVerb()) {
                            for (String verb : egp.getVerb())
                                evtFeature.addVerb(verb);
                        }
                    } //TESTED (cached and non-cached cases)
                      // (in the cached case, evtFeature has already been updated by updateCachedAssocFeatureStatistics)

                    if (_diagnosticMode) {
                        if (_logInDiagnosticMode)
                            System.out.println("AssociationAggregationUtils.updateEventFeatures, found: "
                                    + ((BasicDBObject) egp.toDb()).toString());
                        if (_logInDiagnosticMode)
                            System.out.println(
                                    "AssociationAggregationUtils.updateEventFeatures, ^^^ found from query: "
                                            + query.toString() + " / " + updateOp.toString());
                    }
                    // (In background aggregation mode we update db_sync_prio when checking the -otherwise unused, unlike entities- document update schedule) 
                } else // (the object in memory is now an accurate representation of the database, minus some fields we'll now add)
                {
                    numNewAssocs++;

                    // Synchronization settings for the newly created object
                    evtFeature.setDb_sync_doccount(nSavedDocCount);
                    if (null == savedSyncTime) {
                        savedSyncTime = Long.toString(System.currentTimeMillis());
                    }
                    evtFeature.setDb_sync_time(savedSyncTime);

                    // This is all "distributed safe" (apart from the db_syc_xxx and it doesn't matter if that is 
                    // out of date, the update will just be slightly out-of-date at worst) since (otherwise) these fields are 
                    // only set here, and the findAndModify is atomic

                    BasicDBObject baseFields = new BasicDBObject();
                    if (null != evtFeature.getEntity1_index()) {
                        baseFields.put(AssociationFeaturePojo.entity1_index_, evtFeature.getEntity1_index());
                    }
                    if (null != evtFeature.getEntity2_index()) {
                        baseFields.put(AssociationFeaturePojo.entity2_index_, evtFeature.getEntity2_index());
                    }
                    if (null != evtFeature.getVerb_category()) {
                        baseFields.put(AssociationFeaturePojo.verb_category_, evtFeature.getVerb_category());
                    }
                    baseFields.put(AssociationFeaturePojo.assoc_type_, evtFeature.getAssociation_type());
                    baseFields.put(AssociationFeaturePojo.db_sync_doccount_, evtFeature.getDb_sync_doccount());
                    baseFields.put(AssociationFeaturePojo.db_sync_time_, evtFeature.getDb_sync_time());
                    baseFields.put(AssociationFeaturePojo.db_sync_prio_, 1000.0); // (ensures new objects are quickly index-synchronized)

                    if (!_diagnosticMode) {
                        // Store the object
                        col.update(query, new BasicDBObject(MongoDbManager.set_, baseFields));
                    } else {
                        if (_logInDiagnosticMode)
                            System.out.println("AssociationAggregationUtils.updateEventFeatures, not found: "
                                    + query.toString() + " / " + baseFields.toString() + "/ orig_update= "
                                    + updateOp.toString());
                    }

                    // (Note even in background aggregation mode we still perform the feature synchronization
                    //  for new entities - and it has to be right at the end because it "corrupts" the objects)

                } //(end if first time seen)

                if (null == cachedAssoc) { // First time we've seen this locally, so add to cache
                    currCache.addCachedAssocFeature(evtFeature);
                    if (_diagnosticMode) {
                        if (_logInDiagnosticMode)
                            System.out
                                    .println("AssociationAggregationUtils.updateEventFeatures, added to cache: "
                                            + evtFeature.toDb());
                    }
                } //TESTED (by hand)                           
            } catch (Exception e) {
                // Exception, remove from feature list
                it.remove();

                // If an exception occurs log the error
                logger.error("Exception Message: " + e.getMessage(), e);
            }

        } // (end loop over all communities for the set of features sharing and index)                        
    } // (end loop over indexes) 

    if ((numCacheHits > 0) || (numCacheMisses > 0)) { // ie some assocs were grabbed
        int cacheSize = 0;
        if (null != currCache) {
            cacheSize = currCache.getAssocCacheSize();
        }
        StringBuffer logMsg = new StringBuffer() // (should append key, but don't have that...)
                .append(" assoc_agg_time_ms=").append(new Date().getTime() - entityAggregationTime)
                .append(" total_assocs=").append(eventFeatures.size()).append(" new_assocs=")
                .append(numNewAssocs).append(" cache_misses=").append(numCacheMisses).append(" cache_hits=")
                .append(numCacheHits).append(" cache_size=").append(cacheSize);

        logger.info(logMsg.toString());
    }

}

From source file:com.ikanow.infinit.e.processing.generic.aggregation.AssociationAggregationUtils.java

License:Open Source License

public static void synchronizeEventFeature(AssociationFeaturePojo eventFeature, ObjectId communityId) {
    DBCollection eventFeatureDb = DbManager.getFeature().getAssociation();

    // NOTE: Important that feeds update occurs before synchronization, since the sync "corrupts" the event      

    if (_diagnosticMode || (null != eventFeature.getDb_sync_time())
            || (null != eventFeature.getDb_sync_prio())) {
        // Else this is a new feature so don't need to update the feature DB, only the index (if db_sync_prio null then have to update to avoid b/g aggergation loop)
        // (note that db_sync_prio will in practice not be set when this is a new feature because it will have same sync_doccount as doc_count)

        long nCurrTime = System.currentTimeMillis();
        //(query from top of the function, basically lookup on gaz_index)
        BasicDBObject update2 = new BasicDBObject();
        update2.put(AssociationFeaturePojo.db_sync_time_, Long.toString(nCurrTime));
        update2.put(AssociationFeaturePojo.db_sync_doccount_, eventFeature.getDoccount());
        BasicDBObject update = new BasicDBObject(MongoDbManager.set_, update2);
        // (also can be added to below)
        BasicDBObject update3 = new BasicDBObject(EntityFeaturePojo.db_sync_prio_, 1);
        update.put(MongoDbManager.unset_, update3);
        BasicDBObject query = new BasicDBObject(AssociationFeaturePojo.index_, eventFeature.getIndex());
        query.put(AssociationFeaturePojo.communityId_, communityId);

        // Keep the number of entity1 and entity2 sets down to a reasonable number
        // (In the end would like to be able to do this based on date rather than (essentially) completely randomly)
        int nSize;
        BasicDBObject toPull = null;//from ww  w . j  av  a 2 s  .c  o m
        if (null != eventFeature.getEntity1()) {
            if ((nSize = eventFeature.getEntity1().size()) > AssociationFeaturePojo.entity_MAXFIELDS) {
                if (null == toPull)
                    toPull = new BasicDBObject();
                ArrayList<String> ent1ToRemove = new ArrayList<String>(
                        eventFeature.getEntity1().size() - AssociationFeaturePojo.entity_MAXFIELDS);
                Iterator<String> it = eventFeature.getEntity1().iterator();
                while (it.hasNext() && (nSize > AssociationFeaturePojo.entity_MAXFIELDS)) {
                    String ent = it.next();
                    if (-1 == ent.indexOf('/')) { // (ie don't remove the index)
                        nSize--;
                        it.remove(); // (this removes from the index)
                        ent1ToRemove.add(ent);
                    }
                }
                toPull.put(AssociationFeaturePojo.entity1_, ent1ToRemove);
                // (this removes from the database)
            }
        }
        if (null != eventFeature.getEntity2()) {
            if ((nSize = eventFeature.getEntity2().size()) > AssociationFeaturePojo.entity_MAXFIELDS) {
                if (null == toPull)
                    toPull = new BasicDBObject();
                ArrayList<String> ent2ToRemove = new ArrayList<String>(
                        eventFeature.getEntity2().size() - AssociationFeaturePojo.entity_MAXFIELDS);
                Iterator<String> it = eventFeature.getEntity2().iterator();
                while (it.hasNext() && (nSize > AssociationFeaturePojo.entity_MAXFIELDS)) {
                    String ent = it.next();
                    if (-1 == ent.indexOf('/')) { // (ie don't remove the index)
                        nSize--;
                        it.remove(); // (this removes from the index)
                        ent2ToRemove.add(ent);
                    }
                }
                toPull.put(AssociationFeaturePojo.entity2_, ent2ToRemove);
                // (this removes from the database)
            }
        }
        if (null != toPull) {
            update.put(MongoDbManager.pullAll_, toPull);
            // (this removes from the database)
        }
        //TESTED (2.1.4.3b, including no index removal clause)

        if (_diagnosticMode) {
            if ((null != eventFeature.getDb_sync_time()) || (null != eventFeature.getDb_sync_prio())) {
                if (_logInDiagnosticMode)
                    System.out.println("AssociationAggregationUtils.synchronizeEventFeature, featureDB: "
                            + query.toString() + " / " + update.toString());
            } else {
                if (_logInDiagnosticMode)
                    System.out.println(
                            "(WOULD NOT RUN) EventAggregationUtils.synchronizeEventFeature, featureDB: "
                                    + query.toString() + " / " + update.toString());
            }
        } else {
            eventFeatureDb.update(query, update, false, true);
        }
    }

    if (_diagnosticMode) {
        if (_logInDiagnosticMode)
            System.out.println("AssociationAggregationUtils.synchronizeEventFeature, synchronize: "
                    + new StringBuffer(eventFeature.getIndex()).append(':').append(communityId).toString()
                    + " = " + IndexManager.mapToIndex(eventFeature, new AssociationFeaturePojoIndexMap()));
    } else {
        ElasticSearchManager esm = IndexManager.getIndex(AssociationFeaturePojoIndexMap.indexName_);
        esm.addDocument(eventFeature, new AssociationFeaturePojoIndexMap(), null, true);
    }
}

From source file:com.ikanow.infinit.e.processing.generic.aggregation.AssociationAggregationUtils.java

License:Open Source License

public static void markAssociationFeatureForSync(AssociationFeaturePojo assocFeature, ObjectId communityId) {
    DBCollection assocFeatureDb = DbManager.getFeature().getAssociation();
    double dPrio = 100.0 * (double) assocFeature.getDoccount()
            / (0.01 + (double) assocFeature.getDb_sync_doccount());
    assocFeature.setDb_sync_prio(dPrio);
    BasicDBObject query = new BasicDBObject(AssociationFeaturePojo.index_, assocFeature.getIndex());
    query.put(AssociationFeaturePojo.communityId_, communityId);
    BasicDBObject update = new BasicDBObject(MongoDbManager.set_,
            new BasicDBObject(AssociationFeaturePojo.db_sync_prio_, dPrio));
    if (_diagnosticMode) {
        if (_logInDiagnosticMode)
            System.out.println("EntityAggregationUtils.markAssociationFeatureForSync, featureDB: "
                    + query.toString() + " / " + update.toString());
    } else {/*from   www . j ava2  s .c o  m*/
        assocFeatureDb.update(query, update, false, true);
    }
}

From source file:com.ikanow.infinit.e.processing.generic.aggregation.AssociationBackgroundAggregationManager.java

License:Open Source License

private boolean getToken() {

    boolean bHaveControl = false;

    final String id_ = "_id";
    final String hostname_ = "hostname";
    final String oneUp_ = "1up";

    String savedHostname = "";
    String savedOneUp = "";

    while (!_bKillMe) {
        // Get IP address:
        BasicDBObject query = new BasicDBObject(id_, _assocLockId);
        BasicDBObject lockObj = (BasicDBObject) MongoDbManager.getFeature().getAggregationLock().findOne(query);
        if (null == lockObj) {
            lockObj = new BasicDBObject(id_, _assocLockId);
            lockObj.put(hostname_, getHostname());
            lockObj.put(oneUp_, Long.toString(1000000L * (new Date().getTime() % 10000)));
            // (ie a randomish start number)

            MongoDbManager.getFeature().getAggregationLock().insert(lockObj);
            // (will fail if another harvester gets there first)

            logger.debug("Creating a new aggregation lock object: " + lockObj.toString());

            lockObj = (BasicDBObject) MongoDbManager.getFeature().getAggregationLock().findOne();
        } //TESTED

        // So by here lockObj is always non-null

        // Do I have control?
        String hostname = lockObj.getString(hostname_);
        String oneUp = lockObj.getString(oneUp_);

        bHaveControl = getHostname().equals(hostname);

        if (!bHaveControl) { // Don't currently have control            
            if (savedHostname.equals(hostname) && savedOneUp.equals(oneUp)) { // Now I have control...
                logger.debug("I am taking control from: " + hostname + ", " + oneUp);
                bHaveControl = true;/*from w  w  w .j  a va  2s  . c om*/
                _nGrabbedControl++;
            } else if (getHostname().equals(savedHostname)) { // I had control of this last time I checked
                logger.debug("Lost control to: " + hostname);
                _nLostControl++;
            }
        } else {
            logger.debug("I have control already: " + hostname);
        } //TESTED

        if (bHaveControl) {
            savedHostname = hostname;
            long nOneUp = Long.parseLong(oneUp);
            lockObj.put(hostname_, getHostname());
            lockObj.put(oneUp_, Long.toString(nOneUp + 1));
            MongoDbManager.getFeature().getAggregationLock().save(lockObj);
            return true;
        } //TESTED
        else { // Save info and sleep for 60s 
            savedHostname = hostname;
            savedOneUp = oneUp;
            logger.debug("Saving state and sleeping: " + savedHostname + ", " + savedOneUp);

            for (int i = 0; (i < 6) && !_bKillMe; ++i) {
                try {
                    Thread.sleep(10000);
                } catch (InterruptedException e) {
                }
            }
        } //TESTED

    } // end loop forever 

    return bHaveControl;
}

From source file:com.ikanow.infinit.e.processing.generic.aggregation.EntityAggregationUtils.java

License:Open Source License

/**
 * Updates the feature entries for the list of entities
 * that was just extracted including changing frequency,
 * adding aliases etc//from   w w  w.ja  va 2  s  .  c o m
 * 
 * This method now has 3 steps:
 * 1. Try to update alias
 *    1.a If fail, create new gaz
 * 2. Update totalfreq and doccount
 * 
 * @param ents List of entities to update in the entity feature
 */
public static void updateEntityFeatures(Map<String, Map<ObjectId, EntityFeaturePojo>> entFeatures) {
    // Some diagnostic counters:
    int numCacheMisses = 0;
    int numCacheHits = 0;
    int numNewEntities = 0;
    long entityAggregationTime = new Date().getTime();

    DBCollection col = DbManager.getFeature().getEntity();

    // (This fn is normally run for a single community id)
    CommunityFeatureCaches.CommunityFeatureCache currCache = null;

    String savedSyncTime = null;
    for (Map<ObjectId, EntityFeaturePojo> entCommunity : entFeatures.values()) {

        Iterator<Map.Entry<ObjectId, EntityFeaturePojo>> it = entCommunity.entrySet().iterator();
        while (it.hasNext()) {
            Map.Entry<ObjectId, EntityFeaturePojo> entFeatureKV = it.next();
            try {
                EntityFeaturePojo entFeature = entFeatureKV.getValue();

                long nSavedDocCount = entFeature.getDoccount();
                long nSavedFreqCount = entFeature.getTotalfreq();
                // (these should be constant across all communities but keep it here
                //  so can assign it using entFeature, it's v cheap so no need to get once like for sync vars)

                // For each community, see if the entity feature already exists *for that community*               
                ObjectId communityID = entFeature.getCommunityId();
                if (null != communityID) {
                    if ((null == currCache) || !currCache.getCommunityId().equals(entFeatureKV.getKey())) {
                        currCache = CommunityFeatureCaches.getCommunityFeatureCache(entFeatureKV.getKey());
                        if (_diagnosticMode) {
                            if (_logInDiagnosticMode)
                                System.out.println(
                                        "EntityAggregationUtils.updateEntityFeatures, Opened cache for community: "
                                                + entFeatureKV.getKey());
                        }
                    } //TESTED (by hand)

                    // Is this in our cache? If so can short cut a bunch of the DB interaction:
                    EntityFeaturePojo cachedEnt = currCache.getCachedEntityFeature(entFeature);
                    if (null != cachedEnt) {
                        if (_incrementalMode) {
                            if (_diagnosticMode) {
                                if (_logInDiagnosticMode)
                                    System.out.println(
                                            "EntityAggregationUtils.updateEntityFeatures, skip cached: "
                                                    + cachedEnt.toDb());
                                //TODO (INF-2825): should be continue-ing here (after implementing incremental caching fully) so can use delta more efficiently...
                            }
                        } else if (_diagnosticMode) {
                            if (_logInDiagnosticMode)
                                System.out
                                        .println("EntityAggregationUtils.updateEntityFeatures, grabbed cached: "
                                                + cachedEnt.toDb());
                        }
                        numCacheHits++;

                    } //TESTED (by hand)                  
                    else {
                        numCacheMisses++;
                    }

                    BasicDBObject query = new BasicDBObject(EntityFeaturePojo.index_, entFeature.getIndex());
                    query.put(EntityFeaturePojo.communityId_, communityID);
                    BasicDBObject updateOp = new BasicDBObject();
                    // Add aliases:
                    BasicDBObject updateOpA = new BasicDBObject();
                    if (null != entFeature.getAlias()) {
                        if ((null == cachedEnt) || (null == cachedEnt.getAlias())
                                || !cachedEnt.getAlias().containsAll(entFeature.getAlias())) {
                            //(if the data we have is already cached, don't bother adding it again)
                            BasicDBObject multiopE = new BasicDBObject(MongoDbManager.each_,
                                    entFeature.getAlias());
                            updateOpA.put(EntityFeaturePojo.alias_, multiopE);
                        } //TESTED (by hand)
                    }
                    // Add link data, if there is any:
                    if ((null != entFeature.getSemanticLinks()) && !entFeature.getSemanticLinks().isEmpty()) {
                        if ((null == cachedEnt) || (null == cachedEnt.getSemanticLinks())
                                || !cachedEnt.getSemanticLinks().containsAll(entFeature.getSemanticLinks())) {
                            //(if the data we have is already cached, don't bother adding it again)
                            BasicDBObject multiopF = new BasicDBObject(MongoDbManager.each_,
                                    entFeature.getSemanticLinks());
                            updateOpA.put(EntityFeaturePojo.linkdata_, multiopF);
                        } //TESTED (by hand)
                    }
                    // OK - now we can copy across the fields into the cache:
                    if (null != cachedEnt) {
                        currCache.updateCachedEntityFeatureStatistics(cachedEnt, entFeature); //(entFeature is now fully up to date)
                    } //TESTED (by hand)

                    if (!updateOpA.isEmpty()) {
                        updateOp.put(MongoDbManager.addToSet_, updateOpA);
                    }
                    // Update frequency:
                    BasicDBObject updateOpB = new BasicDBObject();
                    updateOpB.put(EntityFeaturePojo.totalfreq_, nSavedFreqCount);
                    updateOpB.put(EntityFeaturePojo.doccount_, nSavedDocCount);
                    updateOp.put(MongoDbManager.inc_, updateOpB);

                    //try to use find/modify to see if something comes back and set doc freq/totalfreq
                    BasicDBObject fields = new BasicDBObject(EntityFeaturePojo.totalfreq_, 1);
                    fields.put(EntityFeaturePojo.doccount_, 1);
                    fields.put(EntityFeaturePojo.alias_, 1);
                    fields.put(EntityFeaturePojo.linkdata_, 1);
                    //(slightly annoying, since only want these 2 largish fields if updating freq but won't know
                    // until after i've got this object)                  
                    fields.put(EntityFeaturePojo.db_sync_time_, 1);
                    fields.put(EntityFeaturePojo.db_sync_doccount_, 1);

                    DBObject dboUpdate = null;
                    if (_diagnosticMode) {
                        if (null == cachedEnt) {
                            dboUpdate = col.findOne(query, fields);
                        }
                    } else {
                        if (null != cachedEnt) {
                            col.update(query, updateOp, false, false);
                        } else { // Not cached - so have to grab the feature we're either getting or creating
                            dboUpdate = col.findAndModify(query, fields, new BasicDBObject(), false, updateOp,
                                    false, true);
                            // (can use findAndModify because specify index, ie the shard key)
                            // (returns entity before the changes above, update the feature object below)
                            // (also atomically creates the object if it doesn't exist so is "distributed-safe")
                        }
                    }
                    if ((null != cachedEnt) || ((dboUpdate != null) && !dboUpdate.keySet().isEmpty())) // (feature already exists)
                    {
                        EntityFeaturePojo gp = cachedEnt;

                        // (Update the entity feature to be correct so that it can be accurately synchronized with the index)
                        if (null == gp) {
                            gp = EntityFeaturePojo.fromDb(dboUpdate, EntityFeaturePojo.class);
                            entFeature.setTotalfreq(gp.getTotalfreq() + nSavedFreqCount);
                            entFeature.setDoccount(gp.getDoccount() + nSavedDocCount);
                            entFeature.setDbSyncDoccount(gp.getDbSyncDoccount());
                            entFeature.setDbSyncTime(gp.getDbSyncTime());
                            if (null != gp.getAlias()) {
                                entFeature.addAllAlias(gp.getAlias());
                            }
                            if (null != gp.getSemanticLinks()) {
                                entFeature.addToSemanticLinks(gp.getSemanticLinks());
                            }
                        } //TESTED (cached case and non-cached case)
                          // (in the cached case, entFeature has already been updated by updateCachedEntityFeatureStatistics)

                        if (_diagnosticMode) {
                            if (_logInDiagnosticMode)
                                System.out.println("EntityAggregationUtils.updateEntityFeatures, found: "
                                        + ((BasicDBObject) gp.toDb()).toString());
                            if (_logInDiagnosticMode)
                                System.out.println(
                                        "EntityAggregationUtils.updateEntityFeatures, ^^^ found from query: "
                                                + query.toString() + " / " + updateOp.toString());
                        }
                        // (In background aggregation mode we update db_sync_prio when checking the doc update schedule) 
                    } else // (the object in memory is now an accurate representation of the database, minus some fields we'll now add)
                    {
                        numNewEntities++;

                        // Synchronization settings for the newly created object
                        if (null == savedSyncTime) {
                            savedSyncTime = Long.toString(System.currentTimeMillis());
                        }
                        entFeature.setDbSyncDoccount(nSavedDocCount);
                        entFeature.setDbSyncTime(savedSyncTime);

                        // This is all "distributed safe" (apart from the db_syc_xxx and it doesn't matter if that is 
                        // out of date, the update will just be slightly out-of-date at worst) since (otherwise) these fields are 
                        // only set here, and the findAndModify is atomic

                        // (Do in raw MongoDB for performance)
                        BasicDBObject baseFields = new BasicDBObject();
                        baseFields.put(EntityFeaturePojo.dimension_, entFeature.getDimension().toString());
                        baseFields.put(EntityFeaturePojo.type_, entFeature.getType());
                        baseFields.put(EntityFeaturePojo.disambiguated_name_,
                                entFeature.getDisambiguatedName());
                        baseFields.put(EntityFeaturePojo.db_sync_doccount_, entFeature.getDbSyncDoccount());
                        baseFields.put(EntityFeaturePojo.db_sync_prio_, 1000.0);
                        baseFields.put(EntityFeaturePojo.db_sync_time_, entFeature.getDbSyncTime());
                        if ((null != entFeature.getSemanticLinks())
                                && !entFeature.getSemanticLinks().isEmpty()) {
                            baseFields.put(EntityFeaturePojo.linkdata_, entFeature.getSemanticLinks());
                        }

                        //attempt to add geotag (makes necessary checks on util side)
                        //also add ontology type if geotag is found
                        EntityGeotagAggregationUtils.addEntityGeo(entFeature);
                        if (entFeature.getGeotag() != null) {
                            BasicDBObject geo = new BasicDBObject(GeoPojo.lat_, entFeature.getGeotag().lat);
                            geo.put(GeoPojo.lon_, entFeature.getGeotag().lon);
                            baseFields.put(EntityFeaturePojo.geotag_, geo);

                            if (entFeature.getOntology_type() != null) {
                                baseFields.put(EntityFeaturePojo.ontology_type_, entFeature.getOntology_type());
                            }
                        }

                        if (!_diagnosticMode) {
                            // Store the object
                            col.update(query, new BasicDBObject(MongoDbManager.set_, baseFields));
                        } else {
                            if (_logInDiagnosticMode)
                                System.out.println("EntityAggregationUtils.updateEntityFeatures, not found: "
                                        + query.toString() + ": " + baseFields.toString());
                        }

                    } //(end first time this feature seen - globally)

                    if (null == cachedEnt) { // First time we've seen this locally, so add to cache
                        currCache.addCachedEntityFeature(entFeature);
                        if (_diagnosticMode) {
                            if (_logInDiagnosticMode)
                                System.out
                                        .println("EntityAggregationUtils.updateEntityFeatures, added to cache: "
                                                + entFeature.toDb());
                        }
                    } //TESTED (by hand)                     

                } //(end if community id assigned)
            } catch (Exception e) {
                // Exception, remove from feature list
                it.remove();

                // If an exception occurs log the error
                logger.error("Exception Message: " + e.getMessage(), e);
            }

        } // (end loop over communities)
    } // (end loop over indexes)

    if ((numCacheHits > 0) || (numCacheMisses > 0)) { // ie some ents were grabbed
        int cacheSize = 0;
        if (null != currCache) {
            cacheSize = currCache.getEntityCacheSize();
        }
        StringBuffer logMsg = new StringBuffer() // (should append key, but don't have that...)
                .append(" ent_agg_time_ms=").append(new Date().getTime() - entityAggregationTime)
                .append(" total_ents=").append(entFeatures.size()).append(" new_ents=").append(numNewEntities)
                .append(" cache_misses=").append(numCacheMisses).append(" cache_hits=").append(numCacheHits)
                .append(" cache_size=").append(cacheSize);

        logger.info(logMsg.toString());
    }

}

From source file:com.ikanow.infinit.e.processing.generic.aggregation.EntityAggregationUtils.java

License:Open Source License

public static void updateMatchingEntities(EntityFeaturePojo entFeature, ObjectId communityId) {
    String index = entFeature.getIndex();
    long totalFreq = entFeature.getTotalfreq();
    long docCount = entFeature.getDoccount();

    try {/*from   ww  w .  j  av  a  2 s. co  m*/
        DBCollection docDb = DbManager.getDocument().getMetadata();

        BasicDBObject query1 = new BasicDBObject();
        query1.put(EntityPojo.docQuery_index_, index);
        query1.put(DocumentPojo.communityId_, communityId);

        BasicDBObject multiopB = new BasicDBObject();
        multiopB.put(EntityPojo.docUpdate_totalfrequency_, totalFreq);
        multiopB.put(EntityPojo.docUpdate_doccount_, docCount);
        BasicDBObject multiopA = new BasicDBObject(MongoDbManager.set_, multiopB);

        if (_diagnosticMode) {
            if (_logInDiagnosticMode)
                System.out.println("EntityAggregationUtils.updateMatchingEntities: " + query1.toString() + " / "
                        + multiopA.toString());
        } else {
            synchronized (GenericProcessingController.class) {
                // Because this op can be slow, and traverse a lot of disk, need to ensure that
                // we don't allow all the threads to hammer it at once (the updates all yield to each other
                // enough that the disk goes totally crazy)

                docDb.update(query1, multiopA, false, true);
                DbManager.getDocument().getLastError(DbManager.getDocument().getMetadata().getName());
                // (enforce consecutive accesses for this potentially very slow operation)
            }

            // Was originally checked updatedExisting but for INF-1406, it sometimes seemed to be 
            // checking the wrong command. I suspect the reason we had this code in here has gone away,
            // and it doesn't matter if this update occasionally fails anyway, it will just be out of date
            // so the check/retry has been removed.
        }
    } catch (Exception ex) {
        logger.error(ex.getMessage(), ex);
    }
}

From source file:com.ikanow.infinit.e.processing.generic.aggregation.EntityAggregationUtils.java

License:Open Source License

public static void synchronizeEntityFeature(EntityFeaturePojo entityFeature, ObjectId communityId) {
    DBCollection entityFeatureDb = DbManager.getFeature().getEntity();

    // NOTE: Important that feeds update occurs before synchronization, since the sync "corrupts" the entity

    if (_diagnosticMode || (null != entityFeature.getDbSyncTime())
            || (null != entityFeature.getDb_sync_prio())) {
        // Else this is a new feature so don't need to update the feature DB, only the index (if db_sync_prio null then have to update to avoid b/g aggergation loop)
        // (note that db_sync_prio will in practice not be set when this is a new feature because it will have same sync_doccount as doc_count)

        long nCurrTime = System.currentTimeMillis();
        //(query from top of the function, basically lookup on gaz_index)
        BasicDBObject update2 = new BasicDBObject();
        update2.put(EntityFeaturePojo.db_sync_time_, Long.toString(nCurrTime));
        update2.put(EntityFeaturePojo.db_sync_doccount_, entityFeature.getDoccount());
        BasicDBObject update = new BasicDBObject(MongoDbManager.set_, update2);
        BasicDBObject update3 = new BasicDBObject(EntityFeaturePojo.db_sync_prio_, 1);
        update.put(MongoDbManager.unset_, update3);
        BasicDBObject query = new BasicDBObject(EntityFeaturePojo.index_, entityFeature.getIndex());
        query.put(EntityFeaturePojo.communityId_, communityId);

        if (_diagnosticMode) {
            if ((null != entityFeature.getDbSyncTime()) || (null != entityFeature.getDb_sync_prio())) {
                if (_logInDiagnosticMode)
                    System.out.println("EntityAggregationUtils.synchronizeEntityFeature, featureDB: "
                            + query.toString() + " / " + update.toString());
            } else {
                if (_logInDiagnosticMode)
                    System.out.println(
                            "(WOULD NOT RUN) EntityAggregationUtils.synchronizeEntityFeature, featureDB: "
                                    + query.toString() + " / " + update.toString());
            }/*from  w  ww.  j  a v a 2 s.com*/
        } else {
            entityFeatureDb.update(query, update, false, true);
        }
    }

    if (_diagnosticMode) {
        if (_logInDiagnosticMode)
            System.out.println("EntityAggregationUtils.synchronizeEntityFeature, synchronize: "
                    + new StringBuffer(entityFeature.getIndex()).append(':').append(communityId).toString()
                    + " = " + IndexManager.mapToIndex(entityFeature, new EntityFeaturePojoIndexMap()));
    } else {
        ElasticSearchManager esm = IndexManager.getIndex(EntityFeaturePojoIndexMap.indexName_);
        esm.addDocument(entityFeature, new EntityFeaturePojoIndexMap(), null, true);
        //(_id is set by the index map to index:communityId)
    }
}

From source file:com.ikanow.infinit.e.processing.generic.aggregation.EntityAggregationUtils.java

License:Open Source License

public static void markEntityFeatureForSync(EntityFeaturePojo entityFeature, ObjectId communityId) {
    DBCollection entityFeatureDb = DbManager.getFeature().getEntity();
    double dPrio = 100.0 * (double) entityFeature.getDoccount()
            / (0.01 + (double) entityFeature.getDbSyncDoccount());
    entityFeature.setDb_sync_prio(dPrio);
    BasicDBObject query = new BasicDBObject(EntityFeaturePojo.index_, entityFeature.getIndex());
    query.put(EntityFeaturePojo.communityId_, communityId);
    BasicDBObject update = new BasicDBObject(MongoDbManager.set_,
            new BasicDBObject(EntityFeaturePojo.db_sync_prio_, dPrio));
    if (_diagnosticMode) {
        if (_logInDiagnosticMode)
            System.out.println("EntityAggregationUtils.markEntityFeatureForSynchronization, featureDB: "
                    + query.toString() + " / " + update.toString());
    } else {/*  ww  w  . java 2  s . co m*/
        entityFeatureDb.update(query, update, false, true);
    }
}