List of usage examples for com.mongodb BasicDBObject getObjectId
public ObjectId getObjectId(final String field)
From source file:com.deftlabs.lock.mongo.impl.LockDao.java
License:Apache License
/** * Try and get the lock. If unable to do so, this returns false. *///from w w w. j a va2 s . c o m static synchronized ObjectId lock(final MongoClient pMongo, final String pLockName, final DistributedLockSvcOptions pSvcOptions, final DistributedLockOptions pLockOptions) { try { // Lookup the lock object. BasicDBObject lockDoc = findById(pMongo, pLockName, pSvcOptions); final long serverTime = getServerTime(pMongo, pSvcOptions); final long startTime = System.currentTimeMillis(); // The doc was not there so we are going to try and insert a new doc. if (lockDoc == null) { final ObjectId lockId = tryInsertNew(pMongo, pLockName, pSvcOptions, pLockOptions, serverTime, startTime); if (lockId != null) return lockId; } if (lockDoc == null) lockDoc = findById(pMongo, pLockName, pSvcOptions); // Get the state. final LockState lockState = LockState.findByCode(lockDoc.getString(LockDef.STATE.field)); final ObjectId currentLockId = lockDoc.getObjectId(LockDef.LOCK_ID.field); // If it is unlocked, then try and lock. if (lockState.isUnlocked()) { final ObjectId lockId = tryLockingExisting(pMongo, pLockName, currentLockId, pSvcOptions, pLockOptions, serverTime, startTime); if (lockId != null) return lockId; } final ObjectId lockId = (ObjectId) lockDoc.get(LockDef.LOCK_ID.field); // Could not get the lock. incrementLockAttemptCount(pMongo, pLockName, lockId, pSvcOptions); return null; } finally { } }
From source file:com.deftlabs.lock.mongo.impl.LockDao.java
License:Apache License
private static ObjectId tryLockingExisting(final MongoClient pMongo, final String pLockName, final ObjectId pCurrentLockId, final DistributedLockSvcOptions pSvcOptions, final DistributedLockOptions pLockOptions, final long pServerTime, final long pStartTime) { final long adjustTime = System.currentTimeMillis() - pStartTime; final long serverTime = pServerTime + adjustTime; final Date now = new Date(serverTime); final ObjectId lockId = ObjectId.get(); final BasicDBObject query = new BasicDBObject(LockDef.ID.field, pLockName); query.put(LockDef.LOCK_ID.field, pCurrentLockId); query.put(LockDef.STATE.field, LockState.UNLOCKED.code()); final BasicDBObject toSet = new BasicDBObject(); toSet.put(LockDef.LIBRARY_VERSION.field, pSvcOptions.getLibVersion()); toSet.put(LockDef.UPDATED.field, now); toSet.put(LockDef.LAST_HEARTBEAT.field, now); toSet.put(LockDef.LOCK_ACQUIRED_TIME.field, now); toSet.put(LockDef.LOCK_TIMEOUT_TIME.field, new Date((serverTime + pLockOptions.getInactiveLockTimeout()))); toSet.put(LockDef.LOCK_ID.field, lockId); toSet.put(LockDef.STATE.field, LockState.LOCKED.code()); toSet.put(LockDef.OWNER_APP_NAME.field, pSvcOptions.getAppName()); toSet.put(LockDef.OWNER_ADDRESS.field, pSvcOptions.getHostAddress()); toSet.put(LockDef.OWNER_HOSTNAME.field, pSvcOptions.getHostname()); toSet.put(LockDef.OWNER_THREAD_ID.field, Thread.currentThread().getId()); toSet.put(LockDef.OWNER_THREAD_NAME.field, Thread.currentThread().getName()); toSet.put(LockDef.OWNER_THREAD_GROUP_NAME.field, Thread.currentThread().getThreadGroup().getName()); toSet.put(LockDef.LOCK_ATTEMPT_COUNT.field, 0); toSet.put(LockDef.INACTIVE_LOCK_TIMEOUT.field, pLockOptions.getInactiveLockTimeout()); // Try and modify the existing lock. final BasicDBObject lockDoc = (BasicDBObject) getDbCollection(pMongo, pSvcOptions).findAndModify(query, new BasicDBObject(LockDef.LOCK_ID.field, 1), null, false, new BasicDBObject(SET, toSet), true, false);//w ww . j a v a 2 s . c o m if (lockDoc == null) return null; if (!lockDoc.containsField(LockDef.LOCK_ID.field)) return null; final ObjectId returnedLockId = lockDoc.getObjectId(LockDef.LOCK_ID.field); if (returnedLockId == null) return null; if (!returnedLockId.equals(lockId)) return null; if (pSvcOptions.getEnableHistory()) { LockHistoryDao.insert(pMongo, pLockName, pSvcOptions, pLockOptions, serverTime, LockState.LOCKED, lockId, false); } // Yay... we have the lock. return lockId; }
From source file:com.fpt.xml.hth.db.lib.converter.MovieTheaterSessionConverter.java
/** * To convert from BasicDBObject object to MovieDBTheaterSessionDTO object * * @param object/*www . java 2 s.c o m*/ * @return MovieDBTheaterSessionDTO */ //TODO: remove public MovieTheaterSessionDTO convertBasicObjectToModel(BasicDBObject object) { MovieTheaterSessionDTO dto = new MovieTheaterSessionDTO(); ObjectId id = object.getObjectId("_id"); BasicDBObject basicMovie = (BasicDBObject) object.get("movie"); BasicDBList basicLstSession = (BasicDBList) object.get("theaters"); //convert basicMovie object movie MovieDB movie = movieConverter.convertBasicObjectToModel(basicMovie); //convert basicLstSession to theaters List<TheaterSessionDTO> theaters = new ArrayList<TheaterSessionDTO>(); if (basicLstSession != null && !basicLstSession.isEmpty()) { for (int i = 0; i < basicLstSession.size(); i++) { BasicDBObject basic = (BasicDBObject) basicLstSession.get(i); TheaterSessionDTO theaterSessionDTO = tsConverter.convertBasicObjectToModel(basic); theaters.add(theaterSessionDTO); } } //set value for object MovieTheaterSessionDTO dto.setId(id); dto.setMovie(movie); dto.setTheaters(theaters); return dto; }
From source file:com.ikanow.infinit.e.api.knowledge.federated.SimpleFederatedQueryEngine.java
License:Open Source License
public static void simpleDocCache(FederatedRequest request, BasicDBObject doc) { if (null != request.cachedDoc_expired) { ObjectId updateId = request.cachedDoc_expired.getObjectId(DocumentPojo.updateId_); if (null != updateId) { doc.put(DocumentPojo.updateId_, updateId); } else {// www .ja v a 2 s . co m doc.put(DocumentPojo.updateId_, request.cachedDoc_expired.getObjectId(DocumentPojo._id_)); } BasicDBObject docUpdate = new BasicDBObject(DocumentPojo.url_, doc.getString(DocumentPojo.url_)); docUpdate.put(DocumentPojo.sourceKey_, doc.getString(DocumentPojo.sourceKey_)); DbManager.getDocument().getMetadata().remove(docUpdate); //DEBUG if (_DEBUG) _logger.debug("DEB: postQA4a: re-cached ... " + docUpdate.toString() + ": " + doc.getObjectId(DocumentPojo.updateId_)); } else if (null == request.cachedDoc) { // if no currently cached doc, simply save what we have //DEBUG if (_DEBUG) _logger.debug("DEB: postQA4b: cached ... " + doc); DbManager.getDocument().getMetadata().save(doc); } // (else already have a valid cached doc so nothing to do) }
From source file:com.ikanow.infinit.e.api.knowledge.federated.SimpleFederatedQueryEngine.java
License:Open Source License
public static void handleComplexDocCaching(FederatedRequest request, boolean cacheMode, ScoringUtils scoreStats) {//from ww w . j a va2 s .c om List<DocumentPojo> docWrapper = request.complexSourceProcResults; //In non-test mode .. Go through the list of docs and work out what the deal is with caching, ie remove docs + add update ids //Also go through and set default scores for any entities that haven't been scored based on existing docs BasicDBObject query = new BasicDBObject(DocumentPojo.sourceKey_, request.endpointInfo.parentSource.getKey()); BasicDBObject fields = new BasicDBObject(DocumentPojo.updateId_, 1); // (ie _id and updateId only) String srcUrl = null; for (DocumentPojo outDoc : docWrapper) { if (null == srcUrl) { srcUrl = outDoc.getSourceUrl(); } // Always make the text non-transient, so gets stored outDoc.makeFullTextNonTransient(); if (null == outDoc.getId()) { outDoc.setId(new ObjectId()); } if (cacheMode && (null == request.cachedResult)) { // (if result not previously cached) // Step 1: deduplication query.put(DocumentPojo.url_, outDoc.getUrl()); BasicDBObject outVal = (BasicDBObject) DbManager.getDocument().getMetadata().findOne(query, fields); if (null != outVal) { //DEBUG if (_DEBUG) _logger.debug("DEB: HCDC1: update cache from : " + outVal + " for " + outDoc.getUrl()); // Use updateId if it exists, otherwise _id ObjectId updateId = outVal.getObjectId(DocumentPojo.updateId_); if (null == updateId) { updateId = outVal.getObjectId(DocumentPojo._id_); } outDoc.setUpdateId(updateId); } } //TESTED (by hand - single and multiple docs mode) // Step 2: add fake scores to all the entities that didn't get scores from the aggregation manager if (null != outDoc.getEntities()) for (EntityPojo ent : outDoc.getEntities()) { boolean fakeStats = true; if (null != scoreStats) { if (scoreStats.fillInEntityStatistics(ent)) { fakeStats = false; } } if (fakeStats) { ent.setDoccount(1L); ent.setTotalfrequency(1L); ent.setDatasetSignificance(10.0); ent.setSignificance(10.0); ent.setQueryCoverage(100.0); } //DEBUG if (_DEBUG) _logger.debug( "DEB: HCDC2: entity: " + ent.getIndex() + " , sig=" + ent.getDatasetSignificance()); } //TESTED if (null != outDoc.getAssociations()) for (AssociationPojo assoc : outDoc.getAssociations()) { assoc.setAssoc_sig(10.0); assoc.setDoccount(1L); } } //TESTED (by hand - overlapping and non-overlapping case) if (cacheMode && (null == request.cachedResult)) { // (if result not previously cached) //Remove old docs now we have new ones DbManager.getDocument().getMetadata().remove(query); // remove everything with this specific URL (ie simple source) query.remove(DocumentPojo.url_); query.put(DocumentPojo.sourceUrl_, srcUrl); DbManager.getDocument().getMetadata().remove(query); // remove everything with this specific _source_ URL (ie docs generated from this URL) // Now cache all the existing docs: @SuppressWarnings("unchecked") ArrayList<Object> tmpDocList = (ArrayList<Object>) DocumentPojo.listToDb(docWrapper, DocumentPojo.listType()); DbManager.getDocument().getMetadata().insert(tmpDocList.toArray(new BasicDBObject[0])); //DEBUG if (_DEBUG) _logger.debug("DEB: HCDC3: remove/insert cache: " + query.toString()); } //TESTED (by hand - single and multiple docs) }
From source file:com.ikanow.infinit.e.harvest.extraction.document.file.InternalInfiniteFile.java
License:Open Source License
public InternalInfiniteFile(String url, NtlmPasswordAuthentication auth) throws MalformedURLException { try {/* www .j a v a2 s .co m*/ ObjectId locationId = null; ObjectId ownerId = null; String communityIdsStr = null; if (url.startsWith(INFINITE_SHARE_PREFIX)) { _isShare = true; locationId = new ObjectId(url.substring(INFINITE_SHARE_PREFIX_LEN).replaceFirst("/.*$", "")); // remove trailing /s, can be used for information //TESTED (2.1, 2.2.1, 2.3) BasicDBObject query = new BasicDBObject(SharePojo._id_, locationId); _resultObj = (BasicDBObject) MongoDbManager.getSocial().getShare().findOne(query); if (null == _resultObj) { throw new MalformedURLException("Not found (or not authorized): " + url); } //TESTED (7.1) String mediaType = (String) _resultObj.get(SharePojo.mediaType_); if ((null != mediaType) && (mediaType.equalsIgnoreCase("application/x-zip-compressed") || mediaType.equalsIgnoreCase("application/zip"))) { _isDirectory = true; ObjectId fileId = _resultObj.getObjectId(SharePojo.binaryId_); GridFSRandomAccessFile file = new GridFSRandomAccessFile( MongoDbManager.getSocial().getShareBinary(), fileId); _zipView = new GridFSZipFile(_resultObj.getString(SharePojo.title_), file); } //TESTED (3.1) else { // Single share if (_resultObj.containsField(SharePojo.documentLocation_)) { throw new MalformedURLException("Reference shares are not currently supported"); } //TESTED (0.1) _isDirectory = false; // (this will get traversed as the initial "directory", which doesn't check isDirectory... //... and will return itself as a single file in the "directory") } //TESTED (1.1, 2.1, 3.1) } //TESTED else if (url.startsWith(INFINITE_CUSTOM_PREFIX)) { _isCustom = true; _isDirectory = true; BasicDBObject query = null; String locationStr = url.substring(INFINITE_CUSTOM_PREFIX_LEN).replaceFirst("/.*$", ""); StringBuffer sb = new StringBuffer(INFINITE_CUSTOM_PREFIX); try { locationId = new ObjectId(locationStr); query = new BasicDBObject(CustomMapReduceJobPojo._id_, locationId); } //TESTED (4.1) catch (Exception e) { // for custom jobs can also specify the job name query = new BasicDBObject(CustomMapReduceJobPojo.jobtitle_, locationStr); } //TESTED (5.1, 6.1) _resultObj = (BasicDBObject) MongoDbManager.getCustom().getLookup().findOne(query); if (null == _resultObj) { throw new MalformedURLException("Not found (or not authorized): " + url); } //TESTED (7.2, 7.3) if (null != locationId) { sb.append(locationStr).append('/') .append(_resultObj.getString(CustomMapReduceJobPojo.jobtitle_)).append('/'); } //TESTED (5.1, 6.1) else { sb.append(_resultObj.getObjectId(CustomMapReduceJobPojo._id_).toString()).append('/') .append(locationStr).append('/'); } //TESTED (4.1) _originalUrl = sb.toString(); _isCustomAppend = _resultObj.getBoolean(CustomMapReduceJobPojo.appendResults_, false); String outputDatabase = _resultObj.getString(CustomMapReduceJobPojo.outputDatabase_); String outputCollection = _resultObj.getString(CustomMapReduceJobPojo.outputCollection_); if (null == outputDatabase) { outputDatabase = "custommr"; } DBCollection outColl = MongoDbManager.getCollection(outputDatabase, outputCollection); BasicDBObject sort = null; if (_isCustomAppend) { // Use time of _last_ record as file time sort = new BasicDBObject("_id", -1); } else { // Use time of _first_ record as file time sort = new BasicDBObject("_id", 1); } //TESTED DBCursor dbc = outColl.find().sort(sort).limit(1); List<DBObject> firstOrLastRecordList = dbc.toArray(); if (!firstOrLastRecordList.isEmpty()) { _overwriteTime = ((ObjectId) firstOrLastRecordList.iterator().next().get("_id")).getTime(); } else { // No records, use lastRunTime_ as backup _overwriteTime = _resultObj.getDate(CustomMapReduceJobPojo.lastRunTime_, new Date()).getTime(); } //TOTEST } //TESTED else { throw new MalformedURLException("Not recognized: " + url); } //TESTED (7.4) communityIdsStr = auth.getDomain(); ownerId = new ObjectId(auth.getUsername()); // Now do some authentication: // Check communities first since that involves no external DB queries: boolean isAuthorized = false; if (_isShare) { BasicDBList communities = (BasicDBList) _resultObj.get(SharePojo.communities_); for (Object communityObj : communities) { BasicDBObject communityDbo = (BasicDBObject) communityObj; ObjectId commId = communityDbo.getObjectId("_id"); if (communityIdsStr.contains(commId.toString())) { isAuthorized = true; break; } } } //TESTED (7.*) else { //_isCustom BasicDBList communities = (BasicDBList) _resultObj.get(CustomMapReduceJobPojo.communityIds_); for (Object communityObj : communities) { ObjectId commId = (ObjectId) communityObj; if (communityIdsStr.equals(commId)) { isAuthorized = true; break; } } } //TESTED (7.*) if (!isAuthorized) { // Still OK ... only if user is an admin isAuthorized = AuthUtils.isAdmin(ownerId); } //TESTED (1,2,3,4,5,6) if (!isAuthorized) { // Permission fail throw new MalformedURLException("Not found (or not authorized): " + url); } //TESTED (7.5) } catch (Exception e) { throw new MalformedURLException("Invalid authentication or location: " + e.getMessage()); } //(just passed exceptions on) // Save original URL if (_isShare) { // (custom handled above) if (!url.endsWith("/")) { _originalUrl = url + "/"; } else { _originalUrl = url; } } //(TESTED 1.3, 2.3, 3.3) }
From source file:com.ikanow.infinit.e.processing.custom.launcher.CustomSavedQueryQueueLauncher.java
License:Open Source License
@SuppressWarnings("unchecked") public static void executeQuery(DocumentQueueControlPojo savedQuery) { if (null == savedQuery._parentShare) { return;/*ww w. ja v a2s. com*/ } AdvancedQueryPojo query = savedQuery.getQueryInfo().getQuery(); // 1) append the a time as an extra query term (unless it's the first time) if (null != savedQuery.getLastDocIdInserted()) { long lastRun = savedQuery.getLastDocIdInserted().getTime(); if (null != savedQuery.getQueryInfo().getLastRun()) { long altLastRun = savedQuery.getQueryInfo().getLastRun().getTime(); if (altLastRun < lastRun) { // pick the longest duration lastRun = altLastRun; } } lastRun = ((new Date().getTime() - lastRun) / 1000L + 3599L) / 3600L; // (hours rounded up) if (lastRun < (14L * 24L)) { // if it's more than 14 days then query over everything if (null == query.qt) { query.qt = new ArrayList<AdvancedQueryPojo.QueryTermPojo>(1); } AdvancedQueryPojo.QueryTermPojo extraTermTime = new AdvancedQueryPojo.QueryTermPojo(); extraTermTime.time = new AdvancedQueryPojo.QueryTermPojo.TimeTermPojo(); extraTermTime.time.max = "now+1d"; // (ie now plus some margin) if (savedQuery.getQueryInfo() .getFrequency() == DocumentQueueControlPojo.SavedQueryInfo.DocQueueFrequency.Hourly) { extraTermTime.time.min = "now-" + (lastRun + 1) + "h"; //extraTermTime.time.min = "now-2h"; // (just add some margin) } else if (savedQuery.getQueryInfo() .getFrequency() == DocumentQueueControlPojo.SavedQueryInfo.DocQueueFrequency.Daily) { extraTermTime.time.min = "now-" + (lastRun + 6) + "h"; //extraTermTime.time.min = "now-30h"; // (just add some margin) } else if (savedQuery.getQueryInfo() .getFrequency() == DocumentQueueControlPojo.SavedQueryInfo.DocQueueFrequency.Weekly) { lastRun = (lastRun + 23L) / 24L; extraTermTime.time.min = "now-" + (lastRun + 1) + "d"; //extraTermTime.time.min = "now-8d"; // (just add some margin) } query.qt.add(extraTermTime); if (null != query.logic) { // else terms ANDed together, ie what I want query.logic = "(" + query.logic + ") AND " + query.qt.size(); } } } //TESTED (test3abc) // 2) other minor mods to the query engine (because there's lots we don't care about) if (null == query.output) { query.output = new AdvancedQueryPojo.QueryOutputPojo(); if (null == query.output.docs) { query.output.docs = new AdvancedQueryPojo.QueryOutputPojo.DocumentOutputPojo(); } } if (null == query.score) { query.score = new AdvancedQueryPojo.QueryScorePojo(); } if (null == query.input) { query.input = new AdvancedQueryPojo.QueryInputPojo(); } query.output.aggregation = null; // (no aggregations) query.output.docs.ents = false; query.output.docs.events = false; query.output.docs.facts = false; query.output.docs.summaries = false; query.output.docs.eventsTimeline = false; query.output.docs.metadata = false; if (null == query.output.docs.numReturn) { query.output.docs.numReturn = 100; // (default) } if (null == query.score.numAnalyze) { query.output.docs.numReturn = 1000; // (default) } //TESTED (entire block) // 3) run saved query: QueryHandler queryHandler = new QueryHandler(); StringBuffer errorString = new StringBuffer(); StringBuffer communityIdStrList = new StringBuffer(); for (ObjectId commId : savedQuery.getQueryInfo().getQuery().communityIds) { if (communityIdStrList.length() > 0) { communityIdStrList.append(','); } communityIdStrList.append(commId.toString()); } //TESTED try { //DEBUG //System.out.println("COMMS="+communityIdStrList.toString() + ": QUERY=" + query.toApi()); // (should have a version of this that just returns the IPs from the index engine) // (for now this will do) ResponsePojo rp = queryHandler.doQuery(savedQuery._parentShare.getOwner().get_id().toString(), query, communityIdStrList.toString(), errorString); if (null == rp) { throw new RuntimeException(errorString.toString()); // (handled below) } // 4) Add the results to the original data SharePojo savedQueryShare = SharePojo.fromDb(DbManager.getSocial().getShare() .findOne(new BasicDBObject(SharePojo._id_, savedQuery._parentShare.get_id())), SharePojo.class); if (null != savedQueryShare) { DocumentQueueControlPojo toModify = DocumentQueueControlPojo.fromApi(savedQueryShare.getShare(), DocumentQueueControlPojo.class); List<BasicDBObject> docs = (List<BasicDBObject>) rp.getData(); if ((null != docs) && !docs.isEmpty()) { if (null == toModify.getQueueList()) { toModify.setQueueList(new ArrayList<ObjectId>(docs.size())); } ObjectId ignoreBeforeId = toModify.getLastDocIdInserted(); ObjectId maxDocId = toModify.getLastDocIdInserted(); //DEBUG //System.out.println("before, num docs=" + toModify.getQueueList().size() + " adding " + docs.size() + " from " + ignoreBeforeId); // Some alerting preamble StringBuffer alertText = null; StringBuffer alertTitle = null; String rootUrl = new PropertiesManager().getURLRoot().replace("/api/", ""); int maxDocsToAdd = 10; // (default) boolean alert = false; if ((null != toModify.getQueryInfo().getAlert()) && (null != toModify.getQueryInfo().getAlert().getEmailAddresses()) && !toModify.getQueryInfo().getAlert().getEmailAddresses().isEmpty()) { alert = true; alertText = new StringBuffer(); if (null != toModify.getQueryInfo().getAlert().getMaxDocsToInclude()) { maxDocsToAdd = toModify.getQueryInfo().getAlert().getMaxDocsToInclude(); if (maxDocsToAdd < 0) { maxDocsToAdd = Integer.MAX_VALUE; } } createAlertPreamble(alertText, toModify.getQueryInfo().getQuery(), savedQuery._parentShare.get_id(), rootUrl); } //TESTED // Add new docs... int numDocsAdded = 0; for (BasicDBObject doc : docs) { ObjectId docId = doc.getObjectId(DocumentPojo._id_); if (null != docId) { if (null != ignoreBeforeId) { if (docId.compareTo(ignoreBeforeId) <= 0) { // ie docId <= ignoreBeforeId continue; } } //(end check if this doc has already been seen) toModify.getQueueList().add(0, docId); //Alerting if (alert) { // (this fn checks if the max number of docs have been added): createAlertDocSummary(alertText, numDocsAdded, maxDocsToAdd, doc, rootUrl); numDocsAdded++; } if (null == maxDocId) { maxDocId = docId; } else if (maxDocId.compareTo(docId) < 0) { // ie maxDocId < docId maxDocId = docId; } } //TESTED (test5) } //(end loop over new docs) // More alerting if (alert && (numDocsAdded > 0)) { alertTitle = new StringBuffer("IKANOW: Queue \"").append(toModify.getQueueName()) .append("\" has ").append(numDocsAdded).append(" new"); if (numDocsAdded == 1) { alertTitle.append(" document."); } else { alertTitle.append(" documents."); } // (terminate the doc list) if (maxDocsToAdd > 0) { alertText.append("</ol>"); alertText.append("\n"); } String to = (Arrays .toString(toModify.getQueryInfo().getAlert().getEmailAddresses().toArray()) .replaceAll("[\\[\\]]", "")).replace(',', ';'); try { new SendMail(null, to, alertTitle.toString(), alertText.toString()).send("text/html"); } catch (Exception e) { //DEBUG //e.printStackTrace(); } } //TESTED // Remove old docs... int maxDocs = query.output.docs.numReturn; if (null != toModify.getMaxDocs()) { // override maxDocs = toModify.getMaxDocs(); } if (toModify.getQueueList().size() > maxDocs) { toModify.setQueueList(toModify.getQueueList().subList(0, maxDocs)); } //TESTED (test2.2) //DEBUG //System.out.println("after, num docs=" + toModify.getQueueList().size() + " at " + maxDocId); // Update share info: toModify.setLastDocIdInserted(maxDocId); // We've modified the share so update it: savedQueryShare.setShare(toModify.toApi()); savedQueryShare.setModified(new Date()); DbManager.getSocial().getShare().save(savedQueryShare.toDb()); } //(end found some docs) } //(end found share) } catch (Exception e) { _logger.info("knowledge/query userid=" + savedQuery._parentShare.getOwner().get_id() + " groups=" + communityIdStrList + " error=" + e.getMessage()); } }
From source file:com.ikanow.infinit.e.processing.custom.launcher.CustomSavedQueryQueueLauncher.java
License:Open Source License
public static void createAlertDocSummary(StringBuffer alertEmailText, int docNum, int numDocSummaries, BasicDBObject doc, String rootUrl) { if (docNum < numDocSummaries) { // Preamble on the first doc if (0 == docNum) { alertEmailText.append("<p>"); alertEmailText.append("Top ").append(numDocSummaries); if (0 == numDocSummaries) { alertEmailText.append(" document:"); } else { alertEmailText.append(" documents:"); }//from ww w . jav a 2 s.c om alertEmailText.append("</p>"); alertEmailText.append("\n"); alertEmailText.append("<ol>"); alertEmailText.append("\n"); } // Docs: StringBuffer guiQuery = new StringBuffer("{\"qt\":[{\"ftext\":\"_id:") .append(doc.getObjectId(DocumentPojo._id_)).append("\"}]}"); String url = doc.getString(DocumentPojo.displayUrl_, doc.getString(DocumentPojo.url_)); String title = doc.getString(DocumentPojo.title_, url); alertEmailText.append("<li/>"); alertEmailText.append(title); alertEmailText.append(" ["); alertEmailText.append(doc.getDate(DocumentPojo.publishedDate_, doc.getDate(DocumentPojo.created_))); alertEmailText.append("]"); alertEmailText.append(" ("); alertEmailText.append("<a href=\"").append(rootUrl); try { alertEmailText.append("?query="); alertEmailText.append(URLEncoder.encode(guiQuery.toString(), "UTF-8")); alertEmailText.append("&communityIds=").append( doc.getObjectId(DocumentPojo.communityId_, new ObjectId("4c927585d591d31d7b37097a"))); } catch (Exception e) { } // (just carry on) alertEmailText.append("\">"); alertEmailText.append("GUI</a>)"); if ((null != url) && (url.startsWith("http"))) { alertEmailText.append(" ("); alertEmailText.append("<a href=\"").append(url).append("\">"); alertEmailText.append("External Link</a>)"); } alertEmailText.append("\n"); } }
From source file:com.ikanow.infinit.e.processing.generic.aggregation.AggregationManager.java
License:Open Source License
public static void updateDocEntitiesFromDeletedDocuments(String uuid) { String outCollection = new StringBuilder(uuid).append("_AggregationUtils").toString(); try {/*w ww . j av a 2 s .c o m*/ PropertiesManager props = new PropertiesManager(); if (props.getAggregationDisabled()) { // (no need to do this) return; } DBCollection outColl = DbManager.getDB("doc_metadata").getCollection(outCollection); DBCursor dbc = outColl.find(); for (DBObject dbo : dbc) { BasicDBObject entityEl = (BasicDBObject) dbo; BasicDBObject entityVal = (BasicDBObject) entityEl.get("value"); long nDocDecrement = entityVal.getLong("dc"); long nFreqDecrement = entityVal.getLong("f"); long nCurrFreq = entityVal.getLong("tf"); long nCurrDocCount = entityVal.getLong("tdc"); // (These are by construction the lowest values so this will provide some defence against going -ve) if (nDocDecrement > nCurrDocCount) { nDocDecrement = nCurrDocCount; } if (nFreqDecrement > nCurrFreq) { nFreqDecrement = nCurrFreq; } BasicDBObject entityId = (BasicDBObject) entityEl.get("_id"); ObjectId commId = null; Object commObj = entityId.get("comm"); if (commObj instanceof ObjectId) { commId = entityId.getObjectId("comm"); } String index = (String) entityId.get("index"); if ((null == index) || (null == commId)) { continue; // random error } BasicDBObject updateQuery = new BasicDBObject(EntityFeaturePojo.index_, index); updateQuery.put(EntityFeaturePojo.communityId_, commId); BasicDBObject entityUpdate1 = new BasicDBObject(EntityFeaturePojo.doccount_, -nDocDecrement); entityUpdate1.put(EntityFeaturePojo.totalfreq_, -nFreqDecrement); BasicDBObject entityUpdate = new BasicDBObject(DbManager.inc_, entityUpdate1); if (_diagnosticMode) { if (_logInDiagnosticMode) System.out.println("UPDATE FEATURE DATABASE: " + updateQuery.toString() + "/" + entityUpdate.toString()); } else { DbManager.getFeature().getEntity().update(updateQuery, entityUpdate); // (can be a single query because the query is on index, the shard) } //TESTED if ((nDocDecrement < nCurrDocCount) && (nDocDecrement * 10 > nCurrDocCount)) { // ie there are some documents left // and the doc count has shifted by more than 10% BasicDBObject updateQuery2 = new BasicDBObject(EntityPojo.docQuery_index_, index); updateQuery2.put(DocumentPojo.communityId_, commId); BasicDBObject entityUpdate2_1 = new BasicDBObject(EntityPojo.docUpdate_doccount_, nCurrDocCount - nDocDecrement); entityUpdate2_1.put(EntityPojo.docUpdate_totalfrequency_, nCurrFreq - nFreqDecrement); BasicDBObject entityUpdate2 = new BasicDBObject(DbManager.set_, entityUpdate2_1); if (_diagnosticMode) { if (_logInDiagnosticMode) System.out.println("UPDATE DOC DATABASE: " + updateQuery2.toString() + "/" + entityUpdate2.toString()); } else { DbManager.getDocument().getMetadata().update(updateQuery2, entityUpdate2, false, true); } } } //TESTED (including when to update logic above) } catch (Exception e) { e.printStackTrace(); } // Tidy up DbManager.getDB("doc_metadata").getCollection(outCollection).drop(); }
From source file:com.ikanow.infinit.e.utility.MongoDocumentTxfer.java
License:Apache License
private void doTransfer(BasicDBObject query, int nSkip, int nLimit, boolean bAggregate, BasicDBObject chunk) throws IOException { PropertiesManager pm = new PropertiesManager(); int nMaxContentSize_bytes = pm.getMaxContentSize(); // Initialize the DB: DBCollection docsDB = DbManager.getDocument().getMetadata(); DBCollection contentDB = DbManager.getDocument().getContent(); DBCollection sourcesDB = DbManager.getIngest().getSource(); ElasticSearchManager.setDefaultClusterName("infinite-aws"); // 1. Get the documents from the DB (combining data + metadata and refreshing source meta) // (Ignore soft-deleted records:) if (null == query) { query = new BasicDBObject(); }//from w ww .ja v a 2 s.c o m Object sourceKeyQueryTerm = query.remove(DocumentPojo.sourceKey_); if (null != sourceKeyQueryTerm) { if (query.toString() .contains(new StringBuffer('"').append(DocumentPojo.sourceKey_).append('"').toString())) { throw new RuntimeException( "Can't specify sourceKey as part of complex query term: " + query.toString()); } //TESTED (by hand, "{ \"sourceKey\": \"x\", \"$or\": [ { \"sourceKey\": \"x\" } ] }") if (sourceKeyQueryTerm instanceof String) { query.put(DocumentPojo.sourceKey_, SourcePojo.getDistributedKeyQueryTerm((String) sourceKeyQueryTerm)); } //TESTED (by hand, "{\"sourceKey\": \"feeds.arstechnica.com.arstechnica.index.11.2.\" }") else if (sourceKeyQueryTerm instanceof DBObject) { // find all the _sources_ matching this term, and convert to a big list including distribution BasicDBObject fields = new BasicDBObject(SourcePojo.key_, 1); fields.put(SourcePojo.highestDistributionFactorStored_, 1); DBCursor dbc = sourcesDB.find(new BasicDBObject(SourcePojo.key_, sourceKeyQueryTerm), fields); LinkedList<String> sourceKeys = new LinkedList<String>(); for (DBObject dbo : dbc) { String key = (String) dbo.get(SourcePojo.key_); Integer distributionFactor = (Integer) dbo.get(SourcePojo.highestDistributionFactorStored_); Collection<String> sourceKeysForSource = SourcePojo.getDistributedKeys(key, distributionFactor); sourceKeys.addAll(sourceKeysForSource); } query.put(DocumentPojo.sourceKey_, new BasicDBObject(DbManager.in_, sourceKeys)); } //TESTED (by hand, "{\"sourceKey\": { \"$gt\": \"dev.ikanow\" } }") else { throw new RuntimeException("Can't specify sourceKey as part of complex query term"); } //(actually not possible, just included here for mathematical completeness...) } else { if (query.toString() .contains(new StringBuffer('"').append(DocumentPojo.sourceKey_).append('"').toString())) { throw new RuntimeException("Can't specify sourceKey as part of complex query term"); } //TESTE (by hand, "{ \"$or\": [ { \"sourceKey\": \"x\" } ] }") // Optimize communityId into sourceKeys... if (null != query.get(DocumentPojo.communityId_)) { try { ObjectId commId = query.getObjectId(DocumentPojo.communityId_); BasicDBObject fields = new BasicDBObject(SourcePojo.key_, 1); fields.put(SourcePojo.highestDistributionFactorStored_, 1); DBCursor dbc = sourcesDB.find(new BasicDBObject(SourcePojo.communityIds_, commId), fields); LinkedList<String> sourceKeys = new LinkedList<String>(); int added = 0; for (DBObject dbo : dbc) { String key = (String) dbo.get(SourcePojo.key_); Integer distributionFactor = (Integer) dbo.get(SourcePojo.highestDistributionFactorStored_); Collection<String> sourceKeysForSource = SourcePojo.getDistributedKeys(key, distributionFactor); sourceKeys.addAll(sourceKeysForSource); added += sourceKeysForSource.size(); } query.put(DocumentPojo.sourceKey_, new BasicDBObject(DbManager.in_, sourceKeys)); System.out.println("(Optimized simple community query to " + added + " source key(s))"); } catch (Exception e) { //DEBUG //e.printStackTrace(); System.out.println("(Can't optimize complex community query: " + e.getMessage()); } } //TESTED (by hand - including distributed source version) } // Ignored delete objects Object urlQuery = query.get(DocumentPojo.url_); if (null == urlQuery) { query.put(DocumentPojo.url_, Pattern.compile("^[^?]")); // (ie nothing starting with ?) } //TESTED else if (urlQuery instanceof BasicDBObject) { ((BasicDBObject) urlQuery).append("$regex", "^[^?]"); } //TESTED //DEBUG //System.out.println("COMBINED QUERY= " + query.toString()); // If aggregating, kick off the background aggregation thread if (bAggregate) { EntityBackgroundAggregationManager.startThread(); AssociationBackgroundAggregationManager.startThread(); } //Debug: DBCursor dbc = null; dbc = docsDB.find(query); if (null != chunk) { if (chunk.containsField(DbManager.min_)) { dbc = dbc.addSpecial(DbManager.min_, chunk.get(DbManager.min_)); } if (chunk.containsField(DbManager.max_)) { dbc = dbc.addSpecial(DbManager.max_, chunk.get(DbManager.max_)); } } dbc = dbc.skip(nSkip).limit(nLimit).batchSize(1000); if (null == chunk) { int nCount = dbc.count() - nSkip; if (nCount < 0) nCount = 0; System.out.println( "Found " + nCount + " records to sync, process first " + (0 == nLimit ? nCount : nLimit)); if (0 == nCount) { // Nothing to do... return; } } byte[] storageArray = new byte[200000]; int nSynced = 0; LinkedList<DocumentPojo> docsToTransfer = new LinkedList<DocumentPojo>(); Map<ObjectId, LinkedList<DocumentPojo>> communityList = null; ObjectId currCommunityId = null; while (dbc.hasNext()) { BasicDBObject dbo = (BasicDBObject) dbc.next(); DocumentPojo doc = DocumentPojo.fromDb(dbo, DocumentPojo.class); String sDocIndex = doc.getIndex(); if (null == sDocIndex) { sDocIndex = "document_index"; } if ((null != _deletedIndex) && !_deletedIndex.contains(sDocIndex)) { _deletedIndex.add(sDocIndex); rebuildIndex(sDocIndex); try { // (Just in case the index requires some time to sort itself out) Thread.sleep(1000); } catch (InterruptedException e) { } } //Debug: //System.out.println("Getting content..." + feed.getTitle() + " / " + feed.getUrl()); // Get the content: if ((0 != nMaxContentSize_bytes) && StoreAndIndexManager.docHasExternalContent(doc.getUrl(), doc.getSourceUrl())) { BasicDBObject contentQ = new BasicDBObject(CompressedFullTextPojo.url_, doc.getUrl()); contentQ.put(CompressedFullTextPojo.sourceKey_, new BasicDBObject(MongoDbManager.in_, Arrays.asList(null, doc.getSourceKey()))); BasicDBObject fields = new BasicDBObject(CompressedFullTextPojo.gzip_content_, 1); fields.put(CompressedFullTextPojo.sourceKey_, 1); DBCursor dbcGzip = contentDB.find(contentQ, fields); while (dbcGzip.hasNext()) { BasicDBObject dboContent = (BasicDBObject) dbcGzip.next(); if (!dboContent.containsField(CompressedFullTextPojo.sourceKey_)) { // If this has another version then ignore this one... if (dbc.hasNext()) { continue; } //TESTED (by hand) } byte[] compressedData = ((byte[]) dboContent.get(CompressedFullTextPojo.gzip_content_)); ByteArrayInputStream in = new ByteArrayInputStream(compressedData); GZIPInputStream gzip = new GZIPInputStream(in); int nRead = 0; StringBuffer output = new StringBuffer(); while (nRead >= 0) { nRead = gzip.read(storageArray, 0, 200000); if (nRead > 0) { String s = new String(storageArray, 0, nRead, "UTF-8"); output.append(s); } } doc.setFullText(output.toString()); } } // (else document has full text already) // Get tags, if necessary: // Always overwrite tags - one of the reasons we might choose to migrate // Also may need source in order to support source index filtering SourcePojo src = _sourceCache.get(doc.getSourceKey()); if (null == src) { //TODO (INF-2265): handle search index settings in pipeline mode... (also didn't seem to work?) BasicDBObject srcDbo = (BasicDBObject) sourcesDB .findOne(new BasicDBObject(SourcePojo.key_, doc.getSourceKey())); if (null != srcDbo) { src = SourcePojo.fromDb(srcDbo, SourcePojo.class); if (null != src.getProcessingPipeline()) { try { // Set the index settings HarvestController hc = new HarvestController(); HarvestControllerPipeline hcPipe = new HarvestControllerPipeline(); hcPipe.extractSource_preProcessingPipeline(src, hc); } catch (Exception e) { //DEBUG e.printStackTrace(); } } //TESTED (by hand) _sourceCache.put(doc.getSourceKey(), src); } } doc.setTempSource(src); // (needed for source index filtering) if (null != src) { if (null != src.getTags()) { Set<String> tagsTidied = new TreeSet<String>(); for (String s : src.getTags()) { String ss = s.trim().toLowerCase(); tagsTidied.add(ss); } // May also want to write this back to the DB: //TODO (INF-2223): Handle append tags or not in the pipeline... if ((null == src.getAppendTagsToDocs()) || src.getAppendTagsToDocs()) { if ((null == doc.getTags()) || (doc.getTags().size() < tagsTidied.size())) { BasicDBObject updateQuery = new BasicDBObject(DocumentPojo.sourceKey_, doc.getRawSourceKey()); // (ie including the # if there is one) updateQuery.put(DocumentPojo._id_, doc.getId()); docsDB.update(updateQuery, new BasicDBObject(DbManager.addToSet_, new BasicDBObject(DocumentPojo.tags_, new BasicDBObject(DbManager.each_, tagsTidied)))); } doc.setTags(tagsTidied); // (just copy ptr across) } } } // 2. Update the index with the new document // (Optionally also update entity and assoc features) if (bAggregate) { if (null == currCommunityId) { currCommunityId = doc.getCommunityId(); } else if (!currCommunityId.equals(doc.getCommunityId())) { LinkedList<DocumentPojo> perCommunityDocList = null; if (null == communityList) { // (very first time we see > 1 community) communityList = new TreeMap<ObjectId, LinkedList<DocumentPojo>>(); perCommunityDocList = new LinkedList<DocumentPojo>(); perCommunityDocList.addAll(docsToTransfer); //(NOT including doc, this hasn't been added to docsToTransfer yet) communityList.put(currCommunityId, perCommunityDocList); } currCommunityId = doc.getCommunityId(); perCommunityDocList = communityList.get(currCommunityId); if (null == perCommunityDocList) { perCommunityDocList = new LinkedList<DocumentPojo>(); communityList.put(currCommunityId, perCommunityDocList); } perCommunityDocList.add(doc); } } //TESTED nSynced++; docsToTransfer.add(doc); if (0 == (nSynced % 10000)) { StoreAndIndexManager manager = new StoreAndIndexManager(); if (bAggregate) { // Loop over communities and aggregate each one then store the modified entities/assocs doAggregation(communityList, docsToTransfer); communityList = null; // (in case the next 10,000 docs are all in the same community!) currCommunityId = null; } //TOTEST manager.addToSearch(docsToTransfer); docsToTransfer.clear(); System.out.println("(Synced " + nSynced + " records)"); } } // (End loop over docs) // Sync remaining docs if (!docsToTransfer.isEmpty()) { if (bAggregate) { // Loop over communities and aggregate each one then store the modified entities/assocs doAggregation(communityList, docsToTransfer); } StoreAndIndexManager manager = new StoreAndIndexManager(); manager.addToSearch(docsToTransfer); } if (null != chunk) { System.out.println("Found " + nSynced + " records to sync in chunk"); } if (bAggregate) { System.out.println("Completed. You can hit CTRL+C at any time."); System.out.println( "By default it will keep running for 5 minutes while the background aggregation runs to update the documents' entities."); try { Thread.sleep(300000); } catch (InterruptedException e) { } // Turn off so we can exit EntityBackgroundAggregationManager.stopThreadAndWait(); AssociationBackgroundAggregationManager.stopThreadAndWait(); } }