Example usage for com.mongodb BasicDBObject getObjectId

List of usage examples for com.mongodb BasicDBObject getObjectId

Introduction

In this page you can find the example usage for com.mongodb BasicDBObject getObjectId.

Prototype

public ObjectId getObjectId(final String field) 

Source Link

Document

Returns the object id or null if not set.

Usage

From source file:com.deftlabs.lock.mongo.impl.LockDao.java

License:Apache License

/**
 * Try and get the lock. If unable to do so, this returns false.
 *///from w w  w.  j  a  va2  s  .  c  o  m
static synchronized ObjectId lock(final MongoClient pMongo, final String pLockName,
        final DistributedLockSvcOptions pSvcOptions, final DistributedLockOptions pLockOptions) {
    try {
        // Lookup the lock object.
        BasicDBObject lockDoc = findById(pMongo, pLockName, pSvcOptions);

        final long serverTime = getServerTime(pMongo, pSvcOptions);
        final long startTime = System.currentTimeMillis();

        // The doc was not there so we are going to try and insert a new doc.
        if (lockDoc == null) {
            final ObjectId lockId = tryInsertNew(pMongo, pLockName, pSvcOptions, pLockOptions, serverTime,
                    startTime);
            if (lockId != null)
                return lockId;
        }

        if (lockDoc == null)
            lockDoc = findById(pMongo, pLockName, pSvcOptions);

        // Get the state.
        final LockState lockState = LockState.findByCode(lockDoc.getString(LockDef.STATE.field));

        final ObjectId currentLockId = lockDoc.getObjectId(LockDef.LOCK_ID.field);

        // If it is unlocked, then try and lock.
        if (lockState.isUnlocked()) {
            final ObjectId lockId = tryLockingExisting(pMongo, pLockName, currentLockId, pSvcOptions,
                    pLockOptions, serverTime, startTime);
            if (lockId != null)
                return lockId;
        }

        final ObjectId lockId = (ObjectId) lockDoc.get(LockDef.LOCK_ID.field);

        // Could not get the lock.
        incrementLockAttemptCount(pMongo, pLockName, lockId, pSvcOptions);

        return null;

    } finally {
    }
}

From source file:com.deftlabs.lock.mongo.impl.LockDao.java

License:Apache License

private static ObjectId tryLockingExisting(final MongoClient pMongo, final String pLockName,
        final ObjectId pCurrentLockId, final DistributedLockSvcOptions pSvcOptions,
        final DistributedLockOptions pLockOptions, final long pServerTime, final long pStartTime) {
    final long adjustTime = System.currentTimeMillis() - pStartTime;

    final long serverTime = pServerTime + adjustTime;
    final Date now = new Date(serverTime);

    final ObjectId lockId = ObjectId.get();

    final BasicDBObject query = new BasicDBObject(LockDef.ID.field, pLockName);
    query.put(LockDef.LOCK_ID.field, pCurrentLockId);
    query.put(LockDef.STATE.field, LockState.UNLOCKED.code());

    final BasicDBObject toSet = new BasicDBObject();
    toSet.put(LockDef.LIBRARY_VERSION.field, pSvcOptions.getLibVersion());
    toSet.put(LockDef.UPDATED.field, now);
    toSet.put(LockDef.LAST_HEARTBEAT.field, now);
    toSet.put(LockDef.LOCK_ACQUIRED_TIME.field, now);
    toSet.put(LockDef.LOCK_TIMEOUT_TIME.field, new Date((serverTime + pLockOptions.getInactiveLockTimeout())));
    toSet.put(LockDef.LOCK_ID.field, lockId);
    toSet.put(LockDef.STATE.field, LockState.LOCKED.code());
    toSet.put(LockDef.OWNER_APP_NAME.field, pSvcOptions.getAppName());
    toSet.put(LockDef.OWNER_ADDRESS.field, pSvcOptions.getHostAddress());
    toSet.put(LockDef.OWNER_HOSTNAME.field, pSvcOptions.getHostname());
    toSet.put(LockDef.OWNER_THREAD_ID.field, Thread.currentThread().getId());
    toSet.put(LockDef.OWNER_THREAD_NAME.field, Thread.currentThread().getName());
    toSet.put(LockDef.OWNER_THREAD_GROUP_NAME.field, Thread.currentThread().getThreadGroup().getName());
    toSet.put(LockDef.LOCK_ATTEMPT_COUNT.field, 0);
    toSet.put(LockDef.INACTIVE_LOCK_TIMEOUT.field, pLockOptions.getInactiveLockTimeout());

    // Try and modify the existing lock.
    final BasicDBObject lockDoc = (BasicDBObject) getDbCollection(pMongo, pSvcOptions).findAndModify(query,
            new BasicDBObject(LockDef.LOCK_ID.field, 1), null, false, new BasicDBObject(SET, toSet), true,
            false);//w  ww .  j a  v  a  2  s  .  c o m

    if (lockDoc == null)
        return null;
    if (!lockDoc.containsField(LockDef.LOCK_ID.field))
        return null;

    final ObjectId returnedLockId = lockDoc.getObjectId(LockDef.LOCK_ID.field);
    if (returnedLockId == null)
        return null;
    if (!returnedLockId.equals(lockId))
        return null;

    if (pSvcOptions.getEnableHistory()) {
        LockHistoryDao.insert(pMongo, pLockName, pSvcOptions, pLockOptions, serverTime, LockState.LOCKED,
                lockId, false);
    }

    // Yay... we have the lock.
    return lockId;
}

From source file:com.fpt.xml.hth.db.lib.converter.MovieTheaterSessionConverter.java

/**
 * To convert from BasicDBObject object to MovieDBTheaterSessionDTO object
 *
 * @param object/*www  . java 2 s.c  o  m*/
 * @return MovieDBTheaterSessionDTO
 */
//TODO: remove
public MovieTheaterSessionDTO convertBasicObjectToModel(BasicDBObject object) {
    MovieTheaterSessionDTO dto = new MovieTheaterSessionDTO();
    ObjectId id = object.getObjectId("_id");
    BasicDBObject basicMovie = (BasicDBObject) object.get("movie");
    BasicDBList basicLstSession = (BasicDBList) object.get("theaters");
    //convert basicMovie object movie
    MovieDB movie = movieConverter.convertBasicObjectToModel(basicMovie);
    //convert basicLstSession to theaters
    List<TheaterSessionDTO> theaters = new ArrayList<TheaterSessionDTO>();
    if (basicLstSession != null && !basicLstSession.isEmpty()) {
        for (int i = 0; i < basicLstSession.size(); i++) {
            BasicDBObject basic = (BasicDBObject) basicLstSession.get(i);
            TheaterSessionDTO theaterSessionDTO = tsConverter.convertBasicObjectToModel(basic);
            theaters.add(theaterSessionDTO);
        }
    }

    //set value for object MovieTheaterSessionDTO
    dto.setId(id);
    dto.setMovie(movie);
    dto.setTheaters(theaters);
    return dto;
}

From source file:com.ikanow.infinit.e.api.knowledge.federated.SimpleFederatedQueryEngine.java

License:Open Source License

public static void simpleDocCache(FederatedRequest request, BasicDBObject doc) {
    if (null != request.cachedDoc_expired) {
        ObjectId updateId = request.cachedDoc_expired.getObjectId(DocumentPojo.updateId_);
        if (null != updateId) {
            doc.put(DocumentPojo.updateId_, updateId);
        } else {// www  .ja  v  a  2 s  . co  m
            doc.put(DocumentPojo.updateId_, request.cachedDoc_expired.getObjectId(DocumentPojo._id_));
        }
        BasicDBObject docUpdate = new BasicDBObject(DocumentPojo.url_, doc.getString(DocumentPojo.url_));
        docUpdate.put(DocumentPojo.sourceKey_, doc.getString(DocumentPojo.sourceKey_));
        DbManager.getDocument().getMetadata().remove(docUpdate);

        //DEBUG
        if (_DEBUG)
            _logger.debug("DEB: postQA4a: re-cached ... " + docUpdate.toString() + ": "
                    + doc.getObjectId(DocumentPojo.updateId_));
    } else if (null == request.cachedDoc) { // if no currently cached doc, simply save what we have
        //DEBUG
        if (_DEBUG)
            _logger.debug("DEB: postQA4b: cached ... " + doc);
        DbManager.getDocument().getMetadata().save(doc);
    }
    // (else already have a valid cached doc so nothing to do)
}

From source file:com.ikanow.infinit.e.api.knowledge.federated.SimpleFederatedQueryEngine.java

License:Open Source License

public static void handleComplexDocCaching(FederatedRequest request, boolean cacheMode,
        ScoringUtils scoreStats) {//from  ww  w . j a  va2  s .c om

    List<DocumentPojo> docWrapper = request.complexSourceProcResults;

    //In non-test mode .. Go through the list of docs and work out what the deal is with caching, ie remove docs + add update ids
    //Also go through and set default scores for any entities that haven't been scored based on existing docs
    BasicDBObject query = new BasicDBObject(DocumentPojo.sourceKey_,
            request.endpointInfo.parentSource.getKey());
    BasicDBObject fields = new BasicDBObject(DocumentPojo.updateId_, 1); // (ie _id and updateId only)
    String srcUrl = null;
    for (DocumentPojo outDoc : docWrapper) {
        if (null == srcUrl) {
            srcUrl = outDoc.getSourceUrl();
        }
        // Always make the text non-transient, so gets stored
        outDoc.makeFullTextNonTransient();
        if (null == outDoc.getId()) {
            outDoc.setId(new ObjectId());
        }

        if (cacheMode && (null == request.cachedResult)) { // (if result not previously cached)
            // Step 1: deduplication
            query.put(DocumentPojo.url_, outDoc.getUrl());
            BasicDBObject outVal = (BasicDBObject) DbManager.getDocument().getMetadata().findOne(query, fields);
            if (null != outVal) {
                //DEBUG
                if (_DEBUG)
                    _logger.debug("DEB: HCDC1: update cache from : " + outVal + " for " + outDoc.getUrl());

                // Use updateId if it exists, otherwise _id
                ObjectId updateId = outVal.getObjectId(DocumentPojo.updateId_);
                if (null == updateId) {
                    updateId = outVal.getObjectId(DocumentPojo._id_);
                }
                outDoc.setUpdateId(updateId);
            }
        } //TESTED (by hand - single and multiple docs mode)

        // Step 2: add fake scores to all the entities that didn't get scores from the aggregation manager
        if (null != outDoc.getEntities())
            for (EntityPojo ent : outDoc.getEntities()) {
                boolean fakeStats = true;
                if (null != scoreStats) {
                    if (scoreStats.fillInEntityStatistics(ent)) {
                        fakeStats = false;
                    }
                }
                if (fakeStats) {
                    ent.setDoccount(1L);
                    ent.setTotalfrequency(1L);
                    ent.setDatasetSignificance(10.0);
                    ent.setSignificance(10.0);
                    ent.setQueryCoverage(100.0);
                }
                //DEBUG
                if (_DEBUG)
                    _logger.debug(
                            "DEB: HCDC2: entity: " + ent.getIndex() + " , sig=" + ent.getDatasetSignificance());
            } //TESTED

        if (null != outDoc.getAssociations())
            for (AssociationPojo assoc : outDoc.getAssociations()) {
                assoc.setAssoc_sig(10.0);
                assoc.setDoccount(1L);
            }
    } //TESTED (by hand - overlapping and non-overlapping case)

    if (cacheMode && (null == request.cachedResult)) { // (if result not previously cached)
        //Remove old docs now we have new ones
        DbManager.getDocument().getMetadata().remove(query); // remove everything with this specific URL (ie simple source)
        query.remove(DocumentPojo.url_);
        query.put(DocumentPojo.sourceUrl_, srcUrl);
        DbManager.getDocument().getMetadata().remove(query); // remove everything with this specific _source_ URL (ie docs generated from this URL)

        // Now cache all the existing docs:

        @SuppressWarnings("unchecked")
        ArrayList<Object> tmpDocList = (ArrayList<Object>) DocumentPojo.listToDb(docWrapper,
                DocumentPojo.listType());

        DbManager.getDocument().getMetadata().insert(tmpDocList.toArray(new BasicDBObject[0]));

        //DEBUG
        if (_DEBUG)
            _logger.debug("DEB: HCDC3: remove/insert cache: " + query.toString());
    } //TESTED (by hand - single and multiple docs)

}

From source file:com.ikanow.infinit.e.harvest.extraction.document.file.InternalInfiniteFile.java

License:Open Source License

public InternalInfiniteFile(String url, NtlmPasswordAuthentication auth) throws MalformedURLException {
    try {/* www  .j  a v a2  s  .co  m*/
        ObjectId locationId = null;

        ObjectId ownerId = null;
        String communityIdsStr = null;

        if (url.startsWith(INFINITE_SHARE_PREFIX)) {
            _isShare = true;
            locationId = new ObjectId(url.substring(INFINITE_SHARE_PREFIX_LEN).replaceFirst("/.*$", "")); // remove trailing /s, can be used for information
            //TESTED (2.1, 2.2.1, 2.3)

            BasicDBObject query = new BasicDBObject(SharePojo._id_, locationId);
            _resultObj = (BasicDBObject) MongoDbManager.getSocial().getShare().findOne(query);
            if (null == _resultObj) {
                throw new MalformedURLException("Not found (or not authorized): " + url);
            } //TESTED (7.1)
            String mediaType = (String) _resultObj.get(SharePojo.mediaType_);
            if ((null != mediaType) && (mediaType.equalsIgnoreCase("application/x-zip-compressed")
                    || mediaType.equalsIgnoreCase("application/zip"))) {
                _isDirectory = true;
                ObjectId fileId = _resultObj.getObjectId(SharePojo.binaryId_);

                GridFSRandomAccessFile file = new GridFSRandomAccessFile(
                        MongoDbManager.getSocial().getShareBinary(), fileId);
                _zipView = new GridFSZipFile(_resultObj.getString(SharePojo.title_), file);
            } //TESTED (3.1)
            else { // Single share
                if (_resultObj.containsField(SharePojo.documentLocation_)) {
                    throw new MalformedURLException("Reference shares are not currently supported");
                } //TESTED (0.1)

                _isDirectory = false; // (this will get traversed as the initial "directory", which doesn't check isDirectory...
                //... and will return itself as a single file in the "directory")
            } //TESTED (1.1, 2.1, 3.1)
        } //TESTED 
        else if (url.startsWith(INFINITE_CUSTOM_PREFIX)) {
            _isCustom = true;
            _isDirectory = true;
            BasicDBObject query = null;
            String locationStr = url.substring(INFINITE_CUSTOM_PREFIX_LEN).replaceFirst("/.*$", "");
            StringBuffer sb = new StringBuffer(INFINITE_CUSTOM_PREFIX);
            try {
                locationId = new ObjectId(locationStr);
                query = new BasicDBObject(CustomMapReduceJobPojo._id_, locationId);
            } //TESTED (4.1)
            catch (Exception e) { // for custom jobs can also specify the job name
                query = new BasicDBObject(CustomMapReduceJobPojo.jobtitle_, locationStr);
            } //TESTED (5.1, 6.1)
            _resultObj = (BasicDBObject) MongoDbManager.getCustom().getLookup().findOne(query);
            if (null == _resultObj) {
                throw new MalformedURLException("Not found (or not authorized): " + url);
            } //TESTED (7.2, 7.3)
            if (null != locationId) {
                sb.append(locationStr).append('/')
                        .append(_resultObj.getString(CustomMapReduceJobPojo.jobtitle_)).append('/');
            } //TESTED (5.1, 6.1)
            else {
                sb.append(_resultObj.getObjectId(CustomMapReduceJobPojo._id_).toString()).append('/')
                        .append(locationStr).append('/');
            } //TESTED (4.1)         
            _originalUrl = sb.toString();
            _isCustomAppend = _resultObj.getBoolean(CustomMapReduceJobPojo.appendResults_, false);

            String outputDatabase = _resultObj.getString(CustomMapReduceJobPojo.outputDatabase_);
            String outputCollection = _resultObj.getString(CustomMapReduceJobPojo.outputCollection_);
            if (null == outputDatabase) {
                outputDatabase = "custommr";
            }
            DBCollection outColl = MongoDbManager.getCollection(outputDatabase, outputCollection);
            BasicDBObject sort = null;
            if (_isCustomAppend) { // Use time of _last_ record as file time
                sort = new BasicDBObject("_id", -1);
            } else { // Use time of _first_ record as file time
                sort = new BasicDBObject("_id", 1);
            } //TESTED
            DBCursor dbc = outColl.find().sort(sort).limit(1);
            List<DBObject> firstOrLastRecordList = dbc.toArray();
            if (!firstOrLastRecordList.isEmpty()) {
                _overwriteTime = ((ObjectId) firstOrLastRecordList.iterator().next().get("_id")).getTime();
            } else { // No records, use lastRunTime_ as backup
                _overwriteTime = _resultObj.getDate(CustomMapReduceJobPojo.lastRunTime_, new Date()).getTime();
            } //TOTEST

        } //TESTED
        else {
            throw new MalformedURLException("Not recognized: " + url);
        } //TESTED (7.4)
        communityIdsStr = auth.getDomain();
        ownerId = new ObjectId(auth.getUsername());

        // Now do some authentication:
        // Check communities first since that involves no external DB queries:
        boolean isAuthorized = false;
        if (_isShare) {
            BasicDBList communities = (BasicDBList) _resultObj.get(SharePojo.communities_);
            for (Object communityObj : communities) {
                BasicDBObject communityDbo = (BasicDBObject) communityObj;
                ObjectId commId = communityDbo.getObjectId("_id");
                if (communityIdsStr.contains(commId.toString())) {
                    isAuthorized = true;
                    break;
                }
            }
        } //TESTED (7.*)
        else { //_isCustom
            BasicDBList communities = (BasicDBList) _resultObj.get(CustomMapReduceJobPojo.communityIds_);
            for (Object communityObj : communities) {
                ObjectId commId = (ObjectId) communityObj;
                if (communityIdsStr.equals(commId)) {
                    isAuthorized = true;
                    break;
                }
            }
        } //TESTED (7.*)
        if (!isAuthorized) { // Still OK ... only if user is an admin
            isAuthorized = AuthUtils.isAdmin(ownerId);
        } //TESTED (1,2,3,4,5,6)
        if (!isAuthorized) { // Permission fail
            throw new MalformedURLException("Not found (or not authorized): " + url);
        } //TESTED (7.5)
    } catch (Exception e) {
        throw new MalformedURLException("Invalid authentication or location: " + e.getMessage());
    } //(just passed exceptions on)
      // Save original URL
    if (_isShare) { // (custom handled above)
        if (!url.endsWith("/")) {
            _originalUrl = url + "/";
        } else {
            _originalUrl = url;
        }

    } //(TESTED 1.3, 2.3, 3.3)

}

From source file:com.ikanow.infinit.e.processing.custom.launcher.CustomSavedQueryQueueLauncher.java

License:Open Source License

@SuppressWarnings("unchecked")
public static void executeQuery(DocumentQueueControlPojo savedQuery) {

    if (null == savedQuery._parentShare) {
        return;/*ww w.  ja v a2s. com*/
    }

    AdvancedQueryPojo query = savedQuery.getQueryInfo().getQuery();

    // 1) append the a time as an extra query term (unless it's the first time)

    if (null != savedQuery.getLastDocIdInserted()) {
        long lastRun = savedQuery.getLastDocIdInserted().getTime();
        if (null != savedQuery.getQueryInfo().getLastRun()) {
            long altLastRun = savedQuery.getQueryInfo().getLastRun().getTime();
            if (altLastRun < lastRun) { // pick the longest duration
                lastRun = altLastRun;
            }
        }
        lastRun = ((new Date().getTime() - lastRun) / 1000L + 3599L) / 3600L; // (hours rounded up)

        if (lastRun < (14L * 24L)) { // if it's more than 14 days then query over everything            

            if (null == query.qt) {
                query.qt = new ArrayList<AdvancedQueryPojo.QueryTermPojo>(1);
            }
            AdvancedQueryPojo.QueryTermPojo extraTermTime = new AdvancedQueryPojo.QueryTermPojo();
            extraTermTime.time = new AdvancedQueryPojo.QueryTermPojo.TimeTermPojo();
            extraTermTime.time.max = "now+1d"; // (ie now plus some margin)
            if (savedQuery.getQueryInfo()
                    .getFrequency() == DocumentQueueControlPojo.SavedQueryInfo.DocQueueFrequency.Hourly) {
                extraTermTime.time.min = "now-" + (lastRun + 1) + "h";
                //extraTermTime.time.min = "now-2h"; // (just add some margin)
            } else if (savedQuery.getQueryInfo()
                    .getFrequency() == DocumentQueueControlPojo.SavedQueryInfo.DocQueueFrequency.Daily) {
                extraTermTime.time.min = "now-" + (lastRun + 6) + "h";
                //extraTermTime.time.min = "now-30h"; // (just add some margin)            
            } else if (savedQuery.getQueryInfo()
                    .getFrequency() == DocumentQueueControlPojo.SavedQueryInfo.DocQueueFrequency.Weekly) {
                lastRun = (lastRun + 23L) / 24L;
                extraTermTime.time.min = "now-" + (lastRun + 1) + "d";
                //extraTermTime.time.min = "now-8d"; // (just add some margin)                     
            }
            query.qt.add(extraTermTime);

            if (null != query.logic) { // else terms ANDed together, ie what I want
                query.logic = "(" + query.logic + ") AND " + query.qt.size();
            }
        }
    } //TESTED (test3abc)

    // 2) other minor mods to the query engine (because there's lots we don't care about)

    if (null == query.output) {
        query.output = new AdvancedQueryPojo.QueryOutputPojo();
        if (null == query.output.docs) {
            query.output.docs = new AdvancedQueryPojo.QueryOutputPojo.DocumentOutputPojo();
        }
    }
    if (null == query.score) {
        query.score = new AdvancedQueryPojo.QueryScorePojo();
    }
    if (null == query.input) {
        query.input = new AdvancedQueryPojo.QueryInputPojo();
    }
    query.output.aggregation = null; // (no aggregations)
    query.output.docs.ents = false;
    query.output.docs.events = false;
    query.output.docs.facts = false;
    query.output.docs.summaries = false;
    query.output.docs.eventsTimeline = false;
    query.output.docs.metadata = false;
    if (null == query.output.docs.numReturn) {
        query.output.docs.numReturn = 100; // (default)
    }
    if (null == query.score.numAnalyze) {
        query.output.docs.numReturn = 1000; // (default)         
    }
    //TESTED (entire block)

    // 3) run saved query:

    QueryHandler queryHandler = new QueryHandler();

    StringBuffer errorString = new StringBuffer();
    StringBuffer communityIdStrList = new StringBuffer();
    for (ObjectId commId : savedQuery.getQueryInfo().getQuery().communityIds) {
        if (communityIdStrList.length() > 0) {
            communityIdStrList.append(',');
        }
        communityIdStrList.append(commId.toString());
    } //TESTED

    try {
        //DEBUG
        //System.out.println("COMMS="+communityIdStrList.toString() + ": QUERY=" + query.toApi());

        // (should have a version of this that just returns the IPs from the index engine)
        // (for now this will do)
        ResponsePojo rp = queryHandler.doQuery(savedQuery._parentShare.getOwner().get_id().toString(), query,
                communityIdStrList.toString(), errorString);

        if (null == rp) {
            throw new RuntimeException(errorString.toString()); // (handled below)
        }

        // 4) Add the results to the original data

        SharePojo savedQueryShare = SharePojo.fromDb(DbManager.getSocial().getShare()
                .findOne(new BasicDBObject(SharePojo._id_, savedQuery._parentShare.get_id())), SharePojo.class);

        if (null != savedQueryShare) {
            DocumentQueueControlPojo toModify = DocumentQueueControlPojo.fromApi(savedQueryShare.getShare(),
                    DocumentQueueControlPojo.class);
            List<BasicDBObject> docs = (List<BasicDBObject>) rp.getData();
            if ((null != docs) && !docs.isEmpty()) {
                if (null == toModify.getQueueList()) {
                    toModify.setQueueList(new ArrayList<ObjectId>(docs.size()));
                }
                ObjectId ignoreBeforeId = toModify.getLastDocIdInserted();
                ObjectId maxDocId = toModify.getLastDocIdInserted();

                //DEBUG
                //System.out.println("before, num docs=" + toModify.getQueueList().size() + " adding " + docs.size() + " from " + ignoreBeforeId);

                // Some alerting preamble
                StringBuffer alertText = null;
                StringBuffer alertTitle = null;
                String rootUrl = new PropertiesManager().getURLRoot().replace("/api/", "");
                int maxDocsToAdd = 10; // (default)
                boolean alert = false;
                if ((null != toModify.getQueryInfo().getAlert())
                        && (null != toModify.getQueryInfo().getAlert().getEmailAddresses())
                        && !toModify.getQueryInfo().getAlert().getEmailAddresses().isEmpty()) {
                    alert = true;
                    alertText = new StringBuffer();
                    if (null != toModify.getQueryInfo().getAlert().getMaxDocsToInclude()) {
                        maxDocsToAdd = toModify.getQueryInfo().getAlert().getMaxDocsToInclude();
                        if (maxDocsToAdd < 0) {
                            maxDocsToAdd = Integer.MAX_VALUE;
                        }
                    }
                    createAlertPreamble(alertText, toModify.getQueryInfo().getQuery(),
                            savedQuery._parentShare.get_id(), rootUrl);
                } //TESTED

                // Add new docs...

                int numDocsAdded = 0;
                for (BasicDBObject doc : docs) {
                    ObjectId docId = doc.getObjectId(DocumentPojo._id_);
                    if (null != docId) {
                        if (null != ignoreBeforeId) {
                            if (docId.compareTo(ignoreBeforeId) <= 0) { // ie docId <= ignoreBeforeId
                                continue;
                            }
                        } //(end check if this doc has already been seen)                     

                        toModify.getQueueList().add(0, docId);

                        //Alerting
                        if (alert) {
                            // (this fn checks if the max number of docs have been added):
                            createAlertDocSummary(alertText, numDocsAdded, maxDocsToAdd, doc, rootUrl);
                            numDocsAdded++;
                        }

                        if (null == maxDocId) {
                            maxDocId = docId;
                        } else if (maxDocId.compareTo(docId) < 0) { // ie maxDocId < docId
                            maxDocId = docId;
                        }
                    } //TESTED (test5)
                } //(end loop over new docs)

                // More alerting
                if (alert && (numDocsAdded > 0)) {
                    alertTitle = new StringBuffer("IKANOW: Queue \"").append(toModify.getQueueName())
                            .append("\" has ").append(numDocsAdded).append(" new");
                    if (numDocsAdded == 1) {
                        alertTitle.append(" document.");
                    } else {
                        alertTitle.append(" documents.");
                    }
                    // (terminate the doc list)
                    if (maxDocsToAdd > 0) {
                        alertText.append("</ol>");
                        alertText.append("\n");
                    }

                    String to = (Arrays
                            .toString(toModify.getQueryInfo().getAlert().getEmailAddresses().toArray())
                            .replaceAll("[\\[\\]]", "")).replace(',', ';');
                    try {
                        new SendMail(null, to, alertTitle.toString(), alertText.toString()).send("text/html");
                    } catch (Exception e) {
                        //DEBUG
                        //e.printStackTrace();
                    }
                } //TESTED

                // Remove old docs...

                int maxDocs = query.output.docs.numReturn;
                if (null != toModify.getMaxDocs()) { // override
                    maxDocs = toModify.getMaxDocs();
                }

                if (toModify.getQueueList().size() > maxDocs) {
                    toModify.setQueueList(toModify.getQueueList().subList(0, maxDocs));
                } //TESTED (test2.2)

                //DEBUG
                //System.out.println("after, num docs=" + toModify.getQueueList().size() + " at " + maxDocId);

                // Update share info:
                toModify.setLastDocIdInserted(maxDocId);

                // We've modified the share so update it:
                savedQueryShare.setShare(toModify.toApi());
                savedQueryShare.setModified(new Date());
                DbManager.getSocial().getShare().save(savedQueryShare.toDb());

            } //(end found some docs)

        } //(end found share)

    } catch (Exception e) {
        _logger.info("knowledge/query userid=" + savedQuery._parentShare.getOwner().get_id() + " groups="
                + communityIdStrList + " error=" + e.getMessage());
    }
}

From source file:com.ikanow.infinit.e.processing.custom.launcher.CustomSavedQueryQueueLauncher.java

License:Open Source License

public static void createAlertDocSummary(StringBuffer alertEmailText, int docNum, int numDocSummaries,
        BasicDBObject doc, String rootUrl) {
    if (docNum < numDocSummaries) {
        // Preamble on the first doc
        if (0 == docNum) {
            alertEmailText.append("<p>");
            alertEmailText.append("Top ").append(numDocSummaries);
            if (0 == numDocSummaries) {
                alertEmailText.append(" document:");
            } else {
                alertEmailText.append(" documents:");
            }//from ww  w . jav  a  2  s.c om
            alertEmailText.append("</p>");
            alertEmailText.append("\n");
            alertEmailText.append("<ol>");
            alertEmailText.append("\n");
        }
        // Docs:         
        StringBuffer guiQuery = new StringBuffer("{\"qt\":[{\"ftext\":\"_id:")
                .append(doc.getObjectId(DocumentPojo._id_)).append("\"}]}");
        String url = doc.getString(DocumentPojo.displayUrl_, doc.getString(DocumentPojo.url_));
        String title = doc.getString(DocumentPojo.title_, url);
        alertEmailText.append("<li/>");
        alertEmailText.append(title);
        alertEmailText.append(" [");
        alertEmailText.append(doc.getDate(DocumentPojo.publishedDate_, doc.getDate(DocumentPojo.created_)));
        alertEmailText.append("]");
        alertEmailText.append(" (");
        alertEmailText.append("<a href=\"").append(rootUrl);
        try {
            alertEmailText.append("?query=");
            alertEmailText.append(URLEncoder.encode(guiQuery.toString(), "UTF-8"));
            alertEmailText.append("&communityIds=").append(
                    doc.getObjectId(DocumentPojo.communityId_, new ObjectId("4c927585d591d31d7b37097a")));
        } catch (Exception e) {
        } // (just carry on)
        alertEmailText.append("\">");
        alertEmailText.append("GUI</a>)");
        if ((null != url) && (url.startsWith("http"))) {
            alertEmailText.append(" (");
            alertEmailText.append("<a href=\"").append(url).append("\">");
            alertEmailText.append("External Link</a>)");
        }
        alertEmailText.append("\n");
    }
}

From source file:com.ikanow.infinit.e.processing.generic.aggregation.AggregationManager.java

License:Open Source License

public static void updateDocEntitiesFromDeletedDocuments(String uuid) {
    String outCollection = new StringBuilder(uuid).append("_AggregationUtils").toString();
    try {/*w  ww  .  j av  a  2 s  .c  o m*/
        PropertiesManager props = new PropertiesManager();
        if (props.getAggregationDisabled()) { // (no need to do this)
            return;
        }
        DBCollection outColl = DbManager.getDB("doc_metadata").getCollection(outCollection);

        DBCursor dbc = outColl.find();
        for (DBObject dbo : dbc) {
            BasicDBObject entityEl = (BasicDBObject) dbo;
            BasicDBObject entityVal = (BasicDBObject) entityEl.get("value");

            long nDocDecrement = entityVal.getLong("dc");
            long nFreqDecrement = entityVal.getLong("f");
            long nCurrFreq = entityVal.getLong("tf");
            long nCurrDocCount = entityVal.getLong("tdc");

            // (These are by construction the lowest values so this will provide some defence against going -ve)
            if (nDocDecrement > nCurrDocCount) {
                nDocDecrement = nCurrDocCount;
            }
            if (nFreqDecrement > nCurrFreq) {
                nFreqDecrement = nCurrFreq;
            }

            BasicDBObject entityId = (BasicDBObject) entityEl.get("_id");
            ObjectId commId = null;
            Object commObj = entityId.get("comm");
            if (commObj instanceof ObjectId) {
                commId = entityId.getObjectId("comm");
            }
            String index = (String) entityId.get("index");
            if ((null == index) || (null == commId)) {
                continue; // random error
            }

            BasicDBObject updateQuery = new BasicDBObject(EntityFeaturePojo.index_, index);
            updateQuery.put(EntityFeaturePojo.communityId_, commId);
            BasicDBObject entityUpdate1 = new BasicDBObject(EntityFeaturePojo.doccount_, -nDocDecrement);
            entityUpdate1.put(EntityFeaturePojo.totalfreq_, -nFreqDecrement);
            BasicDBObject entityUpdate = new BasicDBObject(DbManager.inc_, entityUpdate1);

            if (_diagnosticMode) {
                if (_logInDiagnosticMode)
                    System.out.println("UPDATE FEATURE DATABASE: " + updateQuery.toString() + "/"
                            + entityUpdate.toString());
            } else {
                DbManager.getFeature().getEntity().update(updateQuery, entityUpdate);
                // (can be a single query because the query is on index, the shard)
            }
            //TESTED

            if ((nDocDecrement < nCurrDocCount) && (nDocDecrement * 10 > nCurrDocCount)) {
                // ie there are some documents left
                // and the doc count has shifted by more than 10%
                BasicDBObject updateQuery2 = new BasicDBObject(EntityPojo.docQuery_index_, index);
                updateQuery2.put(DocumentPojo.communityId_, commId);
                BasicDBObject entityUpdate2_1 = new BasicDBObject(EntityPojo.docUpdate_doccount_,
                        nCurrDocCount - nDocDecrement);
                entityUpdate2_1.put(EntityPojo.docUpdate_totalfrequency_, nCurrFreq - nFreqDecrement);
                BasicDBObject entityUpdate2 = new BasicDBObject(DbManager.set_, entityUpdate2_1);

                if (_diagnosticMode) {
                    if (_logInDiagnosticMode)
                        System.out.println("UPDATE DOC DATABASE: " + updateQuery2.toString() + "/"
                                + entityUpdate2.toString());
                } else {
                    DbManager.getDocument().getMetadata().update(updateQuery2, entityUpdate2, false, true);
                }
            }
        } //TESTED (including when to update logic above)
    } catch (Exception e) {
        e.printStackTrace();
    }

    // Tidy up
    DbManager.getDB("doc_metadata").getCollection(outCollection).drop();
}

From source file:com.ikanow.infinit.e.utility.MongoDocumentTxfer.java

License:Apache License

private void doTransfer(BasicDBObject query, int nSkip, int nLimit, boolean bAggregate, BasicDBObject chunk)
        throws IOException {
    PropertiesManager pm = new PropertiesManager();
    int nMaxContentSize_bytes = pm.getMaxContentSize();

    // Initialize the DB:

    DBCollection docsDB = DbManager.getDocument().getMetadata();
    DBCollection contentDB = DbManager.getDocument().getContent();
    DBCollection sourcesDB = DbManager.getIngest().getSource();

    ElasticSearchManager.setDefaultClusterName("infinite-aws");

    // 1. Get the documents from the DB (combining data + metadata and refreshing source meta)

    // (Ignore soft-deleted records:)
    if (null == query) {
        query = new BasicDBObject();
    }//from   w  ww  .ja  v a  2 s.c o m
    Object sourceKeyQueryTerm = query.remove(DocumentPojo.sourceKey_);
    if (null != sourceKeyQueryTerm) {
        if (query.toString()
                .contains(new StringBuffer('"').append(DocumentPojo.sourceKey_).append('"').toString())) {
            throw new RuntimeException(
                    "Can't specify sourceKey as part of complex query term: " + query.toString());
        } //TESTED (by hand, "{ \"sourceKey\": \"x\", \"$or\": [ { \"sourceKey\": \"x\" } ] }")

        if (sourceKeyQueryTerm instanceof String) {
            query.put(DocumentPojo.sourceKey_,
                    SourcePojo.getDistributedKeyQueryTerm((String) sourceKeyQueryTerm));
        } //TESTED (by hand, "{\"sourceKey\": \"feeds.arstechnica.com.arstechnica.index.11.2.\" }")
        else if (sourceKeyQueryTerm instanceof DBObject) { // find all the _sources_ matching this term, and convert to a big list including distribution
            BasicDBObject fields = new BasicDBObject(SourcePojo.key_, 1);
            fields.put(SourcePojo.highestDistributionFactorStored_, 1);
            DBCursor dbc = sourcesDB.find(new BasicDBObject(SourcePojo.key_, sourceKeyQueryTerm), fields);
            LinkedList<String> sourceKeys = new LinkedList<String>();
            for (DBObject dbo : dbc) {
                String key = (String) dbo.get(SourcePojo.key_);
                Integer distributionFactor = (Integer) dbo.get(SourcePojo.highestDistributionFactorStored_);
                Collection<String> sourceKeysForSource = SourcePojo.getDistributedKeys(key, distributionFactor);
                sourceKeys.addAll(sourceKeysForSource);
            }
            query.put(DocumentPojo.sourceKey_, new BasicDBObject(DbManager.in_, sourceKeys));
        } //TESTED (by hand, "{\"sourceKey\": { \"$gt\": \"dev.ikanow\" } }")
        else {
            throw new RuntimeException("Can't specify sourceKey as part of complex query term");
        } //(actually not possible, just included here for mathematical completeness...)         
    } else {
        if (query.toString()
                .contains(new StringBuffer('"').append(DocumentPojo.sourceKey_).append('"').toString())) {
            throw new RuntimeException("Can't specify sourceKey as part of complex query term");
        } //TESTE (by hand, "{ \"$or\": [ { \"sourceKey\": \"x\" } ] }")

        // Optimize communityId into sourceKeys...
        if (null != query.get(DocumentPojo.communityId_)) {
            try {
                ObjectId commId = query.getObjectId(DocumentPojo.communityId_);
                BasicDBObject fields = new BasicDBObject(SourcePojo.key_, 1);
                fields.put(SourcePojo.highestDistributionFactorStored_, 1);
                DBCursor dbc = sourcesDB.find(new BasicDBObject(SourcePojo.communityIds_, commId), fields);
                LinkedList<String> sourceKeys = new LinkedList<String>();
                int added = 0;
                for (DBObject dbo : dbc) {
                    String key = (String) dbo.get(SourcePojo.key_);
                    Integer distributionFactor = (Integer) dbo.get(SourcePojo.highestDistributionFactorStored_);
                    Collection<String> sourceKeysForSource = SourcePojo.getDistributedKeys(key,
                            distributionFactor);
                    sourceKeys.addAll(sourceKeysForSource);
                    added += sourceKeysForSource.size();
                }
                query.put(DocumentPojo.sourceKey_, new BasicDBObject(DbManager.in_, sourceKeys));

                System.out.println("(Optimized simple community query to " + added + " source key(s))");
            } catch (Exception e) {
                //DEBUG
                //e.printStackTrace();

                System.out.println("(Can't optimize complex community query: " + e.getMessage());
            }
        } //TESTED (by hand - including distributed source version)
    }
    // Ignored delete objects
    Object urlQuery = query.get(DocumentPojo.url_);
    if (null == urlQuery) {
        query.put(DocumentPojo.url_, Pattern.compile("^[^?]")); // (ie nothing starting with ?)
    } //TESTED
    else if (urlQuery instanceof BasicDBObject) {
        ((BasicDBObject) urlQuery).append("$regex", "^[^?]");
    } //TESTED
      //DEBUG
      //System.out.println("COMBINED QUERY= " + query.toString());

    // If aggregating, kick off the background aggregation thread
    if (bAggregate) {
        EntityBackgroundAggregationManager.startThread();
        AssociationBackgroundAggregationManager.startThread();
    }

    //Debug:
    DBCursor dbc = null;
    dbc = docsDB.find(query);
    if (null != chunk) {
        if (chunk.containsField(DbManager.min_)) {
            dbc = dbc.addSpecial(DbManager.min_, chunk.get(DbManager.min_));
        }
        if (chunk.containsField(DbManager.max_)) {
            dbc = dbc.addSpecial(DbManager.max_, chunk.get(DbManager.max_));
        }
    }
    dbc = dbc.skip(nSkip).limit(nLimit).batchSize(1000);
    if (null == chunk) {
        int nCount = dbc.count() - nSkip;
        if (nCount < 0)
            nCount = 0;
        System.out.println(
                "Found " + nCount + " records to sync, process first " + (0 == nLimit ? nCount : nLimit));
        if (0 == nCount) { // Nothing to do...
            return;
        }
    }

    byte[] storageArray = new byte[200000];

    int nSynced = 0;
    LinkedList<DocumentPojo> docsToTransfer = new LinkedList<DocumentPojo>();
    Map<ObjectId, LinkedList<DocumentPojo>> communityList = null;
    ObjectId currCommunityId = null;
    while (dbc.hasNext()) {
        BasicDBObject dbo = (BasicDBObject) dbc.next();
        DocumentPojo doc = DocumentPojo.fromDb(dbo, DocumentPojo.class);
        String sDocIndex = doc.getIndex();
        if (null == sDocIndex) {
            sDocIndex = "document_index";
        }
        if ((null != _deletedIndex) && !_deletedIndex.contains(sDocIndex)) {
            _deletedIndex.add(sDocIndex);
            rebuildIndex(sDocIndex);
            try { // (Just in case the index requires some time to sort itself out)
                Thread.sleep(1000);
            } catch (InterruptedException e) {
            }
        }

        //Debug:
        //System.out.println("Getting content..." + feed.getTitle() + " / " + feed.getUrl());

        // Get the content:
        if ((0 != nMaxContentSize_bytes)
                && StoreAndIndexManager.docHasExternalContent(doc.getUrl(), doc.getSourceUrl())) {
            BasicDBObject contentQ = new BasicDBObject(CompressedFullTextPojo.url_, doc.getUrl());
            contentQ.put(CompressedFullTextPojo.sourceKey_,
                    new BasicDBObject(MongoDbManager.in_, Arrays.asList(null, doc.getSourceKey())));
            BasicDBObject fields = new BasicDBObject(CompressedFullTextPojo.gzip_content_, 1);
            fields.put(CompressedFullTextPojo.sourceKey_, 1);

            DBCursor dbcGzip = contentDB.find(contentQ, fields);
            while (dbcGzip.hasNext()) {
                BasicDBObject dboContent = (BasicDBObject) dbcGzip.next();
                if (!dboContent.containsField(CompressedFullTextPojo.sourceKey_)) {
                    // If this has another version then ignore this one...
                    if (dbc.hasNext()) {
                        continue;
                    } //TESTED (by hand)               
                }

                byte[] compressedData = ((byte[]) dboContent.get(CompressedFullTextPojo.gzip_content_));
                ByteArrayInputStream in = new ByteArrayInputStream(compressedData);
                GZIPInputStream gzip = new GZIPInputStream(in);
                int nRead = 0;
                StringBuffer output = new StringBuffer();
                while (nRead >= 0) {
                    nRead = gzip.read(storageArray, 0, 200000);
                    if (nRead > 0) {
                        String s = new String(storageArray, 0, nRead, "UTF-8");
                        output.append(s);
                    }
                }
                doc.setFullText(output.toString());
            }
        }
        // (else document has full text already)

        // Get tags, if necessary:
        // Always overwrite tags - one of the reasons we might choose to migrate
        // Also may need source in order to support source index filtering
        SourcePojo src = _sourceCache.get(doc.getSourceKey());
        if (null == src) {
            //TODO (INF-2265): handle search index settings in pipeline mode... (also didn't seem to work?)
            BasicDBObject srcDbo = (BasicDBObject) sourcesDB
                    .findOne(new BasicDBObject(SourcePojo.key_, doc.getSourceKey()));
            if (null != srcDbo) {
                src = SourcePojo.fromDb(srcDbo, SourcePojo.class);

                if (null != src.getProcessingPipeline()) {
                    try {
                        // Set the index settings
                        HarvestController hc = new HarvestController();
                        HarvestControllerPipeline hcPipe = new HarvestControllerPipeline();
                        hcPipe.extractSource_preProcessingPipeline(src, hc);
                    } catch (Exception e) {
                        //DEBUG
                        e.printStackTrace();
                    }
                } //TESTED (by hand)

                _sourceCache.put(doc.getSourceKey(), src);
            }
        }
        doc.setTempSource(src); // (needed for source index filtering)
        if (null != src) {
            if (null != src.getTags()) {
                Set<String> tagsTidied = new TreeSet<String>();
                for (String s : src.getTags()) {
                    String ss = s.trim().toLowerCase();
                    tagsTidied.add(ss);
                }

                // May also want to write this back to the DB:
                //TODO (INF-2223): Handle append tags or not in the pipeline...
                if ((null == src.getAppendTagsToDocs()) || src.getAppendTagsToDocs()) {
                    if ((null == doc.getTags()) || (doc.getTags().size() < tagsTidied.size())) {
                        BasicDBObject updateQuery = new BasicDBObject(DocumentPojo.sourceKey_,
                                doc.getRawSourceKey()); // (ie including the # if there is one)
                        updateQuery.put(DocumentPojo._id_, doc.getId());
                        docsDB.update(updateQuery,
                                new BasicDBObject(DbManager.addToSet_, new BasicDBObject(DocumentPojo.tags_,
                                        new BasicDBObject(DbManager.each_, tagsTidied))));
                    }
                    doc.setTags(tagsTidied); // (just copy ptr across)
                }
            }
        }

        // 2. Update the index with the new document            

        // (Optionally also update entity and assoc features)

        if (bAggregate) {
            if (null == currCommunityId) {
                currCommunityId = doc.getCommunityId();
            } else if (!currCommunityId.equals(doc.getCommunityId())) {
                LinkedList<DocumentPojo> perCommunityDocList = null;
                if (null == communityList) { // (very first time we see > 1 community)
                    communityList = new TreeMap<ObjectId, LinkedList<DocumentPojo>>();
                    perCommunityDocList = new LinkedList<DocumentPojo>();
                    perCommunityDocList.addAll(docsToTransfer); //(NOT including doc, this hasn't been added to docsToTransfer yet)
                    communityList.put(currCommunityId, perCommunityDocList);
                }
                currCommunityId = doc.getCommunityId();
                perCommunityDocList = communityList.get(currCommunityId);
                if (null == perCommunityDocList) {
                    perCommunityDocList = new LinkedList<DocumentPojo>();
                    communityList.put(currCommunityId, perCommunityDocList);
                }
                perCommunityDocList.add(doc);
            }
        } //TESTED

        nSynced++;
        docsToTransfer.add(doc);
        if (0 == (nSynced % 10000)) {
            StoreAndIndexManager manager = new StoreAndIndexManager();

            if (bAggregate) {
                // Loop over communities and aggregate each one then store the modified entities/assocs               
                doAggregation(communityList, docsToTransfer);
                communityList = null; // (in case the next 10,000 docs are all in the same community!)
                currCommunityId = null;

            } //TOTEST            

            manager.addToSearch(docsToTransfer);
            docsToTransfer.clear();
            System.out.println("(Synced " + nSynced + " records)");
        }

    } // (End loop over docs)

    // Sync remaining docs

    if (!docsToTransfer.isEmpty()) {
        if (bAggregate) {
            // Loop over communities and aggregate each one then store the modified entities/assocs               
            doAggregation(communityList, docsToTransfer);
        }

        StoreAndIndexManager manager = new StoreAndIndexManager();
        manager.addToSearch(docsToTransfer);
    }

    if (null != chunk) {
        System.out.println("Found " + nSynced + " records to sync in chunk");
    }

    if (bAggregate) {
        System.out.println("Completed. You can hit CTRL+C at any time.");
        System.out.println(
                "By default it will keep running for 5 minutes while the background aggregation runs to update the documents' entities.");
        try {
            Thread.sleep(300000);
        } catch (InterruptedException e) {
        }

        // Turn off so we can exit
        EntityBackgroundAggregationManager.stopThreadAndWait();
        AssociationBackgroundAggregationManager.stopThreadAndWait();
    }
}