List of usage examples for com.mongodb DBObject keySet
Set<String> keySet();
From source file:com.hipstogram.storm.state.TrackCountMongoDBMapper.java
License:Apache License
@Override public OpaqueValue getValue(DBObject dbObject) { DBObject histogram = (DBObject) dbObject.get(HISTOGRAM_NAME); Object[] keys = histogram.keySet().toArray(); Integer l = 0;// w ww . j ava 2 s. c o m if (keys.length != 0) l = Integer.parseInt(histogram.get((String) keys[0]).toString()); return (OpaqueValue) new OpaqueValue<Number>(0l, l); }
From source file:com.ibm.db2j.MongoDB.java
License:Open Source License
/** * This method analyses the result document from Mongo and determines a suitable * initial Logical Table definition from it. * //from w ww. j a v a 2 s . com * @param resultDoc - the mongo document that has been retrieved from the collection. * @return a String representing a Logical Table definition appropriate to the resultDoc. */ // // private static String generateLTDefFromMongoDocument(DBObject resultDoc) { StringBuilder ltDef = new StringBuilder(); for (String fieldName : resultDoc.keySet()) { // The field gives us the column name Object mongoField = resultDoc.get(fieldName); if (mongoField != null) { if (mongoField instanceof java.lang.String) { ltDef = ltDef.append(fieldName).append(" VARCHAR(255), "); } else if (mongoField instanceof java.lang.Integer) { ltDef = ltDef.append(fieldName).append(" INTEGER, "); } else if (mongoField instanceof java.lang.Double) { ltDef = ltDef.append(fieldName).append(" DOUBLE, "); } else if (mongoField instanceof java.lang.Boolean) { ltDef = ltDef.append(fieldName).append(" BOOLEAN, "); } else if (mongoField instanceof java.util.Date) { ltDef = ltDef.append(fieldName).append(" DATE, "); } else if (mongoField instanceof org.bson.types.BSONTimestamp) { ltDef = ltDef.append(fieldName).append(" TIMESTAMP, "); } else if (mongoField instanceof org.bson.types.ObjectId || mongoField instanceof com.mongodb.BasicDBObject || mongoField instanceof com.mongodb.BasicDBList) { ltDef = ltDef.append(fieldName).append(" VARCHAR(255), "); } } } //remove a trailing ", ".. int defLength = ltDef.length(); if (defLength > 2) { ltDef = ltDef.delete(defLength - 2, defLength - 1); } return ltDef.toString(); }
From source file:com.ikanow.infinit.e.harvest.HarvestControllerPipeline.java
License:Open Source License
private void splitDocuments(DocumentPojo doc, SourcePojo source, SourcePipelinePojo splitter, List<DocumentPojo> docs) { try {//from w ww . j av a2 s .c o m if (null == source.getRssConfig()) { source.setRssConfig(new SourceRssConfigPojo()); } if (null != source.getRssConfig().getExtraUrls()) { // refreshed ready for new document source.getRssConfig().setExtraUrls(null); } HashMap<String, Object> jsonLookup = new HashMap<String, Object>(); if ((null != splitter.splitter.getScriptlang()) && splitter.splitter.getScriptlang().startsWith("automatic")) { // (automatic or automatic_json or automatic_xml) String[] args = splitter.splitter.getScript().split("\\s*,\\s*"); Object[] objList = null; String field = args[0]; if (field.startsWith(DocumentPojo.fullText_)) { // fullText, or fullText.[x] where [x] is the root value DocumentPojo dummyDoc = new DocumentPojo(); dummyDoc.setFullText(doc.getFullText()); MetadataSpecPojo dummyContent = new MetadataSpecPojo(); dummyContent.fieldName = "extract"; dummyContent.scriptlang = "stream"; dummyContent.flags = "o"; if (field.equals(DocumentPojo.fullText_)) { // fullText dummyContent.script = ""; } else { dummyContent.script = field.substring(1 + DocumentPojo.fullText_.length()); //+1 for the "." } _uah.processMetadataChain(dummyDoc, Arrays.asList(dummyContent), source.getRssConfig(), null); BasicDBObject dummyDocDbo = (BasicDBObject) dummyDoc.toDb(); dummyDocDbo = (BasicDBObject) dummyDocDbo.get(DocumentPojo.metadata_); if (null != dummyDocDbo) { objList = ((Collection<?>) (dummyDocDbo.get("extract"))).toArray(); // (returns a list of strings) } } //TESTED (doc_splitter_test_auto_json, json: test3, xml: test4) else if (field.startsWith(DocumentPojo.metadata_)) { // field starts with "metadata." objList = doc.getMetadata().get(field.substring(1 + DocumentPojo.metadata_.length())); //+1 for the "." } //TESTED (doc_splitter_test_auto_json, test1) else { // direct reference to metadata field objList = doc.getMetadata().get(field); } //TESTED (doc_splitter_test_auto_json, test2) if ((null != objList) && (objList.length > 0)) { source.getRssConfig().setExtraUrls(new ArrayList<ExtraUrlPojo>(objList.length)); int num = 0; for (Object o : objList) { num++; ExtraUrlPojo url = new ExtraUrlPojo(); if ((1 == args.length) || !(o instanceof DBObject)) { // generate default URL url.url = doc.getUrl() + "#" + num; } //TESTED (doc_splitter_test_auto_json, test1) else if (2 == args.length) { // url specified in the format <fieldname-in-dot-notation> url.url = MongoDbUtil.getProperty((DBObject) o, args[1]); } //TESTED (doc_splitter_test_auto_json, test2) else { // url specified in format <message-format-with-{1}-{2}-etc>,<fieldname-in-dot-notation-for-1>,.. ArrayList<Object> cmdArgs = new ArrayList<Object>(args.length - 1); //-2 + 1 (+1 - see below) cmdArgs.add("[INDEX_FROM_1_NOT_0]"); for (int j = 2; j < args.length; ++j) { cmdArgs.add(MongoDbUtil.getProperty((DBObject) o, args[j])); } url.url = MessageFormat.format(args[1], cmdArgs.toArray()); } //TESTED (doc_splitter_test_auto_json, test3, test4) if (null == url.url) { // (if we can't extract a URL then bail out) continue; } url.title = new StringBuffer(doc.getTitle()).append(" (").append(num).append(")") .toString(); url.fullText = o.toString(); source.getRssConfig().getExtraUrls().add(url); if (splitter.splitter.getScriptlang().startsWith("automatic_")) { // automatic_json or automatic_xml jsonLookup.put(url.url, o); } } } //TESTED (doc_splitter_test_auto_json) } else { // normal case - run the 'follow web links' code to get the docs source.getRssConfig().setSearchConfig(splitter.splitter); FeedHarvester_searchEngineSubsystem subsys = new FeedHarvester_searchEngineSubsystem(); subsys.generateFeedFromSearch(source, _hc, doc); } if (null != source.getRssConfig().getExtraUrls()) { for (ExtraUrlPojo newDocInfo : source.getRssConfig().getExtraUrls()) { if (null == doc.getSourceUrl()) { // (if sourceUrl != null, bypass it's because it's been generated by a file so is being deleted anyway) //(note: this null check above is relied upon by the federated query engine, so don't go randomly changing it!) if (_hc.getDuplicateManager().isDuplicate_Url(newDocInfo.url, source, null)) { //TODO: should handle updateCycle_secs? continue; } } DocumentPojo newDoc = new DocumentPojo(); newDoc.setCreated(doc.getCreated()); newDoc.setModified(doc.getModified()); newDoc.setUrl(newDocInfo.url); newDoc.setTitle(newDocInfo.title); newDoc.setDescription(newDocInfo.description); newDoc.setFullText(newDocInfo.fullText); // For JSON, also create the metadata) if (null != splitter.splitter.getScriptlang()) { if (splitter.splitter.getScriptlang().equals("automatic_json")) { newDoc.addToMetadata("json", jsonLookup.get(newDoc.getUrl())); } else if (splitter.splitter.getScriptlang().equals("automatic_xml")) { Object obj = jsonLookup.get(newDoc.getUrl()); if (obj instanceof DBObject) { DBObject dbo = (DBObject) obj; for (String key : dbo.keySet()) { Object objArray = dbo.get(key); if (objArray instanceof Object[]) { newDoc.addToMetadata(key, (Object[]) objArray); } else if (objArray instanceof Collection<?>) { newDoc.addToMetadata(key, ((Collection<?>) objArray).toArray()); } } } //(test4) } } //TESTED (doc_splitter_test_auto_json, test1:json, test4:xml) // Published date is a bit more complex if (null != newDocInfo.publishedDate) { try { newDoc.setPublishedDate(new Date(DateUtility.parseDate(newDocInfo.publishedDate))); } catch (Exception e) { } } //TESTED (test3,test4) if (null == newDoc.getPublishedDate()) { newDoc.setPublishedDate(doc.getPublishedDate()); } //TESTED (test1) if (null == newDoc.getPublishedDate()) { newDoc.setPublishedDate(doc.getCreated()); } //TESTED (test2) newDoc.setTempSource(source); newDoc.setSource(doc.getSource()); newDoc.setMediaType(doc.getMediaType()); newDoc.setSourceKey(doc.getSourceKey()); newDoc.setSourceUrl(doc.getSourceUrl()); // (otherwise won't be able to delete child docs that come from a file) newDoc.setCommunityId(doc.getCommunityId()); newDoc.setDocGeo(doc.getDocGeo()); newDoc.setIndex(doc.getIndex()); newDoc.setSpawnedFrom(splitter); docs.add(newDoc); } //end loop over URLs } //TESTED } catch (Exception e) { StringBuffer errMessage = HarvestExceptionUtils.createExceptionMessage(e); _hc.getHarvestStatus().logMessage(errMessage.toString(), true); } //TESTED (test4) }
From source file:com.ikanow.infinit.e.processing.generic.aggregation.AssociationAggregationUtils.java
License:Open Source License
/** * Add events to the elastic search index for events * and the mongodb collection//from w ww .j a v a2s . c om * so they are searchable for searchsuggest * * Step 1.a, try to just update alias's * Step 1.b, if fail, create new entry * * Step 2, Update totalfreq and doccount * * Step 3, After updating totalfreq and doccount, write to ES for every group * * @param events */ public static void updateEventFeatures(Map<String, Map<ObjectId, AssociationFeaturePojo>> eventFeatures) { // Some diagnostic counters: int numCacheMisses = 0; int numCacheHits = 0; int numNewAssocs = 0; long entityAggregationTime = new Date().getTime(); DBCollection col = DbManager.getFeature().getAssociation(); // (This fn is normally run for a single community id) CommunityFeatureCaches.CommunityFeatureCache currCache = null; String savedSyncTime = null; for (Map<ObjectId, AssociationFeaturePojo> evtCommunity : eventFeatures.values()) { Iterator<Map.Entry<ObjectId, AssociationFeaturePojo>> it = evtCommunity.entrySet().iterator(); while (it.hasNext()) { Map.Entry<ObjectId, AssociationFeaturePojo> evtFeatureKV = it.next(); try { AssociationFeaturePojo evtFeature = evtFeatureKV.getValue(); long nSavedDocCount = evtFeature.getDoccount(); ObjectId communityID = evtFeature.getCommunityId(); if ((null == currCache) || !currCache.getCommunityId().equals(evtFeatureKV.getKey())) { currCache = CommunityFeatureCaches.getCommunityFeatureCache(evtFeatureKV.getKey()); if (_diagnosticMode) { if (_logInDiagnosticMode) System.out.println( "AssociationAggregationUtils.updateEventFeatures, Opened cache for community: " + evtFeatureKV.getKey()); } } //TESTED (by hand) // Is this in our cache? If so can short cut a bunch of the DB interaction: AssociationFeaturePojo cachedAssoc = currCache.getCachedAssocFeature(evtFeature); if (null != cachedAssoc) { if (_incrementalMode) { if (_diagnosticMode) { if (_logInDiagnosticMode) System.out.println( "AssociationAggregationUtils.updateEventFeatures, skip cached: " + cachedAssoc.toDb()); //TODO (INF-2825): should be continue-ing here so can use delta more efficiently... } } else if (_diagnosticMode) { if (_logInDiagnosticMode) System.out .println("AssociationAggregationUtils.updateEventFeatures, grabbed cached: " + cachedAssoc.toDb()); } numCacheHits++; } //TESTED (by hand) else { numCacheMisses++; } //try to update BasicDBObject query = new BasicDBObject(AssociationFeaturePojo.index_, evtFeature.getIndex()); query.put(AssociationFeaturePojo.communityId_, communityID); //Step1 try to update alias //update arrays BasicDBObject multiopAliasArrays = new BasicDBObject(); // Entity1 Alias: if (null != evtFeature.getEntity1_index()) { evtFeature.addEntity1(evtFeature.getEntity1_index()); } if (null != evtFeature.getEntity1()) { if ((null == cachedAssoc) || (null == cachedAssoc.getEntity1()) || !cachedAssoc.getEntity1().containsAll(evtFeature.getEntity1())) { BasicDBObject multiopE = new BasicDBObject(MongoDbManager.each_, evtFeature.getEntity1()); multiopAliasArrays.put(AssociationFeaturePojo.entity1_, multiopE); } } //TESTED (by hand) // Entity2 Alias: if (null != evtFeature.getEntity2_index()) { evtFeature.addEntity2(evtFeature.getEntity2_index()); } if (null != evtFeature.getEntity2()) { if ((null == cachedAssoc) || (null == cachedAssoc.getEntity2()) || !cachedAssoc.getEntity2().containsAll(evtFeature.getEntity2())) { BasicDBObject multiopE = new BasicDBObject(MongoDbManager.each_, evtFeature.getEntity2()); multiopAliasArrays.put(AssociationFeaturePojo.entity2_, multiopE); } } //TESTED (by hand) // verb/verb cat alias: if (null != evtFeature.getVerb_category()) { evtFeature.addVerb(evtFeature.getVerb_category()); } if (null != evtFeature.getVerb()) { if ((null == cachedAssoc) || (null == cachedAssoc.getVerb()) || !cachedAssoc.getVerb().containsAll(evtFeature.getVerb())) { BasicDBObject multiopE = new BasicDBObject(MongoDbManager.each_, evtFeature.getVerb()); multiopAliasArrays.put(AssociationFeaturePojo.verb_, multiopE); } } //TESTED (by hand) // OK - now we can copy across the fields into the cache: if (null != cachedAssoc) { currCache.updateCachedAssocFeatureStatistics(cachedAssoc, evtFeature); //(evtFeature is now fully up to date) } //TESTED (by hand) BasicDBObject updateOp = new BasicDBObject(); if (!multiopAliasArrays.isEmpty()) { updateOp.put(MongoDbManager.addToSet_, multiopAliasArrays); } // Document count for this event BasicDBObject updateFreqDocCount = new BasicDBObject(AssociationFeaturePojo.doccount_, nSavedDocCount); updateOp.put(MongoDbManager.inc_, updateFreqDocCount); BasicDBObject fields = new BasicDBObject(AssociationFeaturePojo.doccount_, 1); fields.put(AssociationFeaturePojo.entity1_, 1); fields.put(AssociationFeaturePojo.entity2_, 1); fields.put(AssociationFeaturePojo.verb_, 1); //(slightly annoying, since only want these if updating dc but won't know // until after i've got this object) fields.put(AssociationFeaturePojo.db_sync_time_, 1); fields.put(AssociationFeaturePojo.db_sync_doccount_, 1); DBObject dboUpdate = null; if (_diagnosticMode) { if (null == cachedAssoc) { dboUpdate = col.findOne(query, fields); } } else { if (null != cachedAssoc) { col.update(query, updateOp, false, false); } else { // Not cached - so have to grab the feature we're either getting or creating dboUpdate = col.findAndModify(query, fields, new BasicDBObject(), false, updateOp, false, true); // (can use findAndModify because specify index, ie the shard key) // (returns event before the changes above, update the feature object below) // (also atomically creates the object if it doesn't exist so is "distributed-safe") } } if ((null != cachedAssoc) || ((dboUpdate != null) && !dboUpdate.keySet().isEmpty())) // (feature already exists) { AssociationFeaturePojo egp = cachedAssoc; if (null == egp) { egp = AssociationFeaturePojo.fromDb(dboUpdate, AssociationFeaturePojo.class); evtFeature.setDoccount(egp.getDoccount() + nSavedDocCount); evtFeature.setDb_sync_doccount(egp.getDb_sync_doccount()); evtFeature.setDb_sync_time(egp.getDb_sync_time()); if (null != egp.getEntity1()) { for (String ent : egp.getEntity1()) evtFeature.addEntity1(ent); } if (null != egp.getEntity2()) { for (String ent : egp.getEntity2()) evtFeature.addEntity2(ent); } if (null != egp.getVerb()) { for (String verb : egp.getVerb()) evtFeature.addVerb(verb); } } //TESTED (cached and non-cached cases) // (in the cached case, evtFeature has already been updated by updateCachedAssocFeatureStatistics) if (_diagnosticMode) { if (_logInDiagnosticMode) System.out.println("AssociationAggregationUtils.updateEventFeatures, found: " + ((BasicDBObject) egp.toDb()).toString()); if (_logInDiagnosticMode) System.out.println( "AssociationAggregationUtils.updateEventFeatures, ^^^ found from query: " + query.toString() + " / " + updateOp.toString()); } // (In background aggregation mode we update db_sync_prio when checking the -otherwise unused, unlike entities- document update schedule) } else // (the object in memory is now an accurate representation of the database, minus some fields we'll now add) { numNewAssocs++; // Synchronization settings for the newly created object evtFeature.setDb_sync_doccount(nSavedDocCount); if (null == savedSyncTime) { savedSyncTime = Long.toString(System.currentTimeMillis()); } evtFeature.setDb_sync_time(savedSyncTime); // This is all "distributed safe" (apart from the db_syc_xxx and it doesn't matter if that is // out of date, the update will just be slightly out-of-date at worst) since (otherwise) these fields are // only set here, and the findAndModify is atomic BasicDBObject baseFields = new BasicDBObject(); if (null != evtFeature.getEntity1_index()) { baseFields.put(AssociationFeaturePojo.entity1_index_, evtFeature.getEntity1_index()); } if (null != evtFeature.getEntity2_index()) { baseFields.put(AssociationFeaturePojo.entity2_index_, evtFeature.getEntity2_index()); } if (null != evtFeature.getVerb_category()) { baseFields.put(AssociationFeaturePojo.verb_category_, evtFeature.getVerb_category()); } baseFields.put(AssociationFeaturePojo.assoc_type_, evtFeature.getAssociation_type()); baseFields.put(AssociationFeaturePojo.db_sync_doccount_, evtFeature.getDb_sync_doccount()); baseFields.put(AssociationFeaturePojo.db_sync_time_, evtFeature.getDb_sync_time()); baseFields.put(AssociationFeaturePojo.db_sync_prio_, 1000.0); // (ensures new objects are quickly index-synchronized) if (!_diagnosticMode) { // Store the object col.update(query, new BasicDBObject(MongoDbManager.set_, baseFields)); } else { if (_logInDiagnosticMode) System.out.println("AssociationAggregationUtils.updateEventFeatures, not found: " + query.toString() + " / " + baseFields.toString() + "/ orig_update= " + updateOp.toString()); } // (Note even in background aggregation mode we still perform the feature synchronization // for new entities - and it has to be right at the end because it "corrupts" the objects) } //(end if first time seen) if (null == cachedAssoc) { // First time we've seen this locally, so add to cache currCache.addCachedAssocFeature(evtFeature); if (_diagnosticMode) { if (_logInDiagnosticMode) System.out .println("AssociationAggregationUtils.updateEventFeatures, added to cache: " + evtFeature.toDb()); } } //TESTED (by hand) } catch (Exception e) { // Exception, remove from feature list it.remove(); // If an exception occurs log the error logger.error("Exception Message: " + e.getMessage(), e); } } // (end loop over all communities for the set of features sharing and index) } // (end loop over indexes) if ((numCacheHits > 0) || (numCacheMisses > 0)) { // ie some assocs were grabbed int cacheSize = 0; if (null != currCache) { cacheSize = currCache.getAssocCacheSize(); } StringBuffer logMsg = new StringBuffer() // (should append key, but don't have that...) .append(" assoc_agg_time_ms=").append(new Date().getTime() - entityAggregationTime) .append(" total_assocs=").append(eventFeatures.size()).append(" new_assocs=") .append(numNewAssocs).append(" cache_misses=").append(numCacheMisses).append(" cache_hits=") .append(numCacheHits).append(" cache_size=").append(cacheSize); logger.info(logMsg.toString()); } }
From source file:com.ikanow.infinit.e.processing.generic.aggregation.EntityAggregationUtils.java
License:Open Source License
/** * Updates the feature entries for the list of entities * that was just extracted including changing frequency, * adding aliases etc/*from w w w.ja v a2 s . c om*/ * * This method now has 3 steps: * 1. Try to update alias * 1.a If fail, create new gaz * 2. Update totalfreq and doccount * * @param ents List of entities to update in the entity feature */ public static void updateEntityFeatures(Map<String, Map<ObjectId, EntityFeaturePojo>> entFeatures) { // Some diagnostic counters: int numCacheMisses = 0; int numCacheHits = 0; int numNewEntities = 0; long entityAggregationTime = new Date().getTime(); DBCollection col = DbManager.getFeature().getEntity(); // (This fn is normally run for a single community id) CommunityFeatureCaches.CommunityFeatureCache currCache = null; String savedSyncTime = null; for (Map<ObjectId, EntityFeaturePojo> entCommunity : entFeatures.values()) { Iterator<Map.Entry<ObjectId, EntityFeaturePojo>> it = entCommunity.entrySet().iterator(); while (it.hasNext()) { Map.Entry<ObjectId, EntityFeaturePojo> entFeatureKV = it.next(); try { EntityFeaturePojo entFeature = entFeatureKV.getValue(); long nSavedDocCount = entFeature.getDoccount(); long nSavedFreqCount = entFeature.getTotalfreq(); // (these should be constant across all communities but keep it here // so can assign it using entFeature, it's v cheap so no need to get once like for sync vars) // For each community, see if the entity feature already exists *for that community* ObjectId communityID = entFeature.getCommunityId(); if (null != communityID) { if ((null == currCache) || !currCache.getCommunityId().equals(entFeatureKV.getKey())) { currCache = CommunityFeatureCaches.getCommunityFeatureCache(entFeatureKV.getKey()); if (_diagnosticMode) { if (_logInDiagnosticMode) System.out.println( "EntityAggregationUtils.updateEntityFeatures, Opened cache for community: " + entFeatureKV.getKey()); } } //TESTED (by hand) // Is this in our cache? If so can short cut a bunch of the DB interaction: EntityFeaturePojo cachedEnt = currCache.getCachedEntityFeature(entFeature); if (null != cachedEnt) { if (_incrementalMode) { if (_diagnosticMode) { if (_logInDiagnosticMode) System.out.println( "EntityAggregationUtils.updateEntityFeatures, skip cached: " + cachedEnt.toDb()); //TODO (INF-2825): should be continue-ing here (after implementing incremental caching fully) so can use delta more efficiently... } } else if (_diagnosticMode) { if (_logInDiagnosticMode) System.out .println("EntityAggregationUtils.updateEntityFeatures, grabbed cached: " + cachedEnt.toDb()); } numCacheHits++; } //TESTED (by hand) else { numCacheMisses++; } BasicDBObject query = new BasicDBObject(EntityFeaturePojo.index_, entFeature.getIndex()); query.put(EntityFeaturePojo.communityId_, communityID); BasicDBObject updateOp = new BasicDBObject(); // Add aliases: BasicDBObject updateOpA = new BasicDBObject(); if (null != entFeature.getAlias()) { if ((null == cachedEnt) || (null == cachedEnt.getAlias()) || !cachedEnt.getAlias().containsAll(entFeature.getAlias())) { //(if the data we have is already cached, don't bother adding it again) BasicDBObject multiopE = new BasicDBObject(MongoDbManager.each_, entFeature.getAlias()); updateOpA.put(EntityFeaturePojo.alias_, multiopE); } //TESTED (by hand) } // Add link data, if there is any: if ((null != entFeature.getSemanticLinks()) && !entFeature.getSemanticLinks().isEmpty()) { if ((null == cachedEnt) || (null == cachedEnt.getSemanticLinks()) || !cachedEnt.getSemanticLinks().containsAll(entFeature.getSemanticLinks())) { //(if the data we have is already cached, don't bother adding it again) BasicDBObject multiopF = new BasicDBObject(MongoDbManager.each_, entFeature.getSemanticLinks()); updateOpA.put(EntityFeaturePojo.linkdata_, multiopF); } //TESTED (by hand) } // OK - now we can copy across the fields into the cache: if (null != cachedEnt) { currCache.updateCachedEntityFeatureStatistics(cachedEnt, entFeature); //(entFeature is now fully up to date) } //TESTED (by hand) if (!updateOpA.isEmpty()) { updateOp.put(MongoDbManager.addToSet_, updateOpA); } // Update frequency: BasicDBObject updateOpB = new BasicDBObject(); updateOpB.put(EntityFeaturePojo.totalfreq_, nSavedFreqCount); updateOpB.put(EntityFeaturePojo.doccount_, nSavedDocCount); updateOp.put(MongoDbManager.inc_, updateOpB); //try to use find/modify to see if something comes back and set doc freq/totalfreq BasicDBObject fields = new BasicDBObject(EntityFeaturePojo.totalfreq_, 1); fields.put(EntityFeaturePojo.doccount_, 1); fields.put(EntityFeaturePojo.alias_, 1); fields.put(EntityFeaturePojo.linkdata_, 1); //(slightly annoying, since only want these 2 largish fields if updating freq but won't know // until after i've got this object) fields.put(EntityFeaturePojo.db_sync_time_, 1); fields.put(EntityFeaturePojo.db_sync_doccount_, 1); DBObject dboUpdate = null; if (_diagnosticMode) { if (null == cachedEnt) { dboUpdate = col.findOne(query, fields); } } else { if (null != cachedEnt) { col.update(query, updateOp, false, false); } else { // Not cached - so have to grab the feature we're either getting or creating dboUpdate = col.findAndModify(query, fields, new BasicDBObject(), false, updateOp, false, true); // (can use findAndModify because specify index, ie the shard key) // (returns entity before the changes above, update the feature object below) // (also atomically creates the object if it doesn't exist so is "distributed-safe") } } if ((null != cachedEnt) || ((dboUpdate != null) && !dboUpdate.keySet().isEmpty())) // (feature already exists) { EntityFeaturePojo gp = cachedEnt; // (Update the entity feature to be correct so that it can be accurately synchronized with the index) if (null == gp) { gp = EntityFeaturePojo.fromDb(dboUpdate, EntityFeaturePojo.class); entFeature.setTotalfreq(gp.getTotalfreq() + nSavedFreqCount); entFeature.setDoccount(gp.getDoccount() + nSavedDocCount); entFeature.setDbSyncDoccount(gp.getDbSyncDoccount()); entFeature.setDbSyncTime(gp.getDbSyncTime()); if (null != gp.getAlias()) { entFeature.addAllAlias(gp.getAlias()); } if (null != gp.getSemanticLinks()) { entFeature.addToSemanticLinks(gp.getSemanticLinks()); } } //TESTED (cached case and non-cached case) // (in the cached case, entFeature has already been updated by updateCachedEntityFeatureStatistics) if (_diagnosticMode) { if (_logInDiagnosticMode) System.out.println("EntityAggregationUtils.updateEntityFeatures, found: " + ((BasicDBObject) gp.toDb()).toString()); if (_logInDiagnosticMode) System.out.println( "EntityAggregationUtils.updateEntityFeatures, ^^^ found from query: " + query.toString() + " / " + updateOp.toString()); } // (In background aggregation mode we update db_sync_prio when checking the doc update schedule) } else // (the object in memory is now an accurate representation of the database, minus some fields we'll now add) { numNewEntities++; // Synchronization settings for the newly created object if (null == savedSyncTime) { savedSyncTime = Long.toString(System.currentTimeMillis()); } entFeature.setDbSyncDoccount(nSavedDocCount); entFeature.setDbSyncTime(savedSyncTime); // This is all "distributed safe" (apart from the db_syc_xxx and it doesn't matter if that is // out of date, the update will just be slightly out-of-date at worst) since (otherwise) these fields are // only set here, and the findAndModify is atomic // (Do in raw MongoDB for performance) BasicDBObject baseFields = new BasicDBObject(); baseFields.put(EntityFeaturePojo.dimension_, entFeature.getDimension().toString()); baseFields.put(EntityFeaturePojo.type_, entFeature.getType()); baseFields.put(EntityFeaturePojo.disambiguated_name_, entFeature.getDisambiguatedName()); baseFields.put(EntityFeaturePojo.db_sync_doccount_, entFeature.getDbSyncDoccount()); baseFields.put(EntityFeaturePojo.db_sync_prio_, 1000.0); baseFields.put(EntityFeaturePojo.db_sync_time_, entFeature.getDbSyncTime()); if ((null != entFeature.getSemanticLinks()) && !entFeature.getSemanticLinks().isEmpty()) { baseFields.put(EntityFeaturePojo.linkdata_, entFeature.getSemanticLinks()); } //attempt to add geotag (makes necessary checks on util side) //also add ontology type if geotag is found EntityGeotagAggregationUtils.addEntityGeo(entFeature); if (entFeature.getGeotag() != null) { BasicDBObject geo = new BasicDBObject(GeoPojo.lat_, entFeature.getGeotag().lat); geo.put(GeoPojo.lon_, entFeature.getGeotag().lon); baseFields.put(EntityFeaturePojo.geotag_, geo); if (entFeature.getOntology_type() != null) { baseFields.put(EntityFeaturePojo.ontology_type_, entFeature.getOntology_type()); } } if (!_diagnosticMode) { // Store the object col.update(query, new BasicDBObject(MongoDbManager.set_, baseFields)); } else { if (_logInDiagnosticMode) System.out.println("EntityAggregationUtils.updateEntityFeatures, not found: " + query.toString() + ": " + baseFields.toString()); } } //(end first time this feature seen - globally) if (null == cachedEnt) { // First time we've seen this locally, so add to cache currCache.addCachedEntityFeature(entFeature); if (_diagnosticMode) { if (_logInDiagnosticMode) System.out .println("EntityAggregationUtils.updateEntityFeatures, added to cache: " + entFeature.toDb()); } } //TESTED (by hand) } //(end if community id assigned) } catch (Exception e) { // Exception, remove from feature list it.remove(); // If an exception occurs log the error logger.error("Exception Message: " + e.getMessage(), e); } } // (end loop over communities) } // (end loop over indexes) if ((numCacheHits > 0) || (numCacheMisses > 0)) { // ie some ents were grabbed int cacheSize = 0; if (null != currCache) { cacheSize = currCache.getEntityCacheSize(); } StringBuffer logMsg = new StringBuffer() // (should append key, but don't have that...) .append(" ent_agg_time_ms=").append(new Date().getTime() - entityAggregationTime) .append(" total_ents=").append(entFeatures.size()).append(" new_ents=").append(numNewEntities) .append(" cache_misses=").append(numCacheMisses).append(" cache_hits=").append(numCacheHits) .append(" cache_size=").append(cacheSize); logger.info(logMsg.toString()); } }
From source file:com.imaginea.mongodb.requestdispatchers.DocumentRequestDispatcher.java
License:Apache License
/** * Gets the keys within a nested document and adds it to the complete Set. * Used by getKeysRequest function above. * //w w w . j av a2s .co m * @param doc * document * @param completeSet * collection of all keys * @param prefix * For nested docs. For the key <foo.bar.baz>, the prefix would * be <foo.bar> */ private void getNestedKeys(DBObject doc, Set<String> completeSet, String prefix) { Set<String> allKeys = doc.keySet(); Iterator<String> it = allKeys.iterator(); while (it.hasNext()) { String temp = it.next(); completeSet.add(prefix + temp); if (doc.get(temp) instanceof BasicDBObject) { getNestedKeys((DBObject) doc.get(temp), completeSet, prefix + temp + "."); } } }
From source file:com.imaginea.mongodb.services.DocumentServiceImpl.java
License:Apache License
/** * Gets the list of documents inside a collection in a database in mongo to * which user is connected to.//w ww . ja v a2 s .c o m * * @param dbName * Name of Database * @param collectionName * Name of Collection from which to get all Documents * * @param query * query to be performed. In case of empty query {} return all * docs. * * @param keys * Keys to be present in the resulted docs. * * @param limit * Number of docs to show. * * @param skip * Docs to skip from the front. * * @return List of all documents. * @exception EmptyDatabaseNameException * If database name is null * @exception EmptyCollectionNameException * If Collection name is null * @exception UndefinedDatabaseException * If database is not present * @exception UndefinedCollectionException * If Collection is not present * @exception DatabaseException * throw super type of UndefinedDatabaseException * @exception ValidationException * throw super type of * EmptyDatabaseNameException,EmptyCollectionNameException * @exception CollectionException * throw super type of UndefinedCollectionException * @exception DocumentException * exception while performing get doc list * */ public ArrayList<DBObject> getQueriedDocsList(String dbName, String collectionName, DBObject query, DBObject keys, int limit, int skip) throws DatabaseException, CollectionException, DocumentException, ValidationException { mongoInstance = mongoInstanceProvider.getMongoInstance(); if (dbName == null) { throw new EmptyDatabaseNameException("Database name is null"); } if (dbName.equals("")) { throw new EmptyDatabaseNameException("Database Name Empty"); } if (collectionName == null) { throw new EmptyCollectionNameException("Collection name is null"); } if (collectionName.equals("")) { throw new EmptyCollectionNameException("Collection Name Empty"); } ArrayList<DBObject> dataList = new ArrayList<DBObject>(); try { if (!mongoInstance.getDatabaseNames().contains(dbName)) { throw new UndefinedDatabaseException("DB with name [" + dbName + "]DOES_NOT_EXIST"); } if (!mongoInstance.getDB(dbName).getCollectionNames().contains(collectionName)) { throw new UndefinedCollectionException("Collection with name [" + collectionName + "] DOES NOT EXIST in Database [" + dbName + "]"); } if (keys.keySet().isEmpty()) { keys.put("_id", 1); // For empty keys return all _id of all docs } // Return Queried Documents DBCursor cursor = mongoInstance.getDB(dbName).getCollection(collectionName).find(query, keys); cursor.limit(limit); cursor.skip(skip); if (cursor.hasNext()) { while (cursor.hasNext()) { dataList.add(cursor.next()); } } } catch (MongoException e) { throw new DocumentException(ErrorCodes.GET_DOCUMENT_LIST_EXCEPTION, "GET_DOCUMENT_LIST_EXCEPTION", e.getCause()); } return dataList; }
From source file:com.indeed.iupload.core.authentification.FileBasedUserPermissionProvider.java
License:Apache License
protected void setUp(DBObject configRoot) { this.rootUserNames = getStringSetField(configRoot, "rootUserNames"); this.anyoneWritableRepositories = getStringSetField(configRoot, "anyoneWritableRepositories"); this.indexwiseAllowedUsers = new HashMap<String, Map<String, Set<String>>>(); final DBObject repositoryIndexMap = (DBObject) configRoot.get("indexwiseAllowedUsers"); for (String repositoryName : repositoryIndexMap.keySet()) { final DBObject indexUsersMap = (DBObject) repositoryIndexMap.get(repositoryName); final Map<String, Set<String>> map = new HashMap<String, Set<String>>(); for (String indexName : indexUsersMap.keySet()) { map.put(indexName, getStringSetField(indexUsersMap, indexName)); }// w w w. j av a 2 s . com this.indexwiseAllowedUsers.put(repositoryName, map); } }
From source file:com.jaspersoft.mongodb.importer.MongoDbImporter.java
License:Open Source License
public void validate(String tableName) { DBCollection collection = mongodbConnection.getMongoDatabase().getCollection(tableName); long size = collection.getCount(); if (size == 0) { logger.error("No data in Mongo database"); return;// w w w .j ava 2s.co m } logger.info("Elements in collection: " + size); logger.info("Validating the first 5 entries"); DBCursor cursor = collection.find().limit(5); DBObject object; Object value; logger.info("---------------"); while (cursor.hasNext()) { object = cursor.next(); for (String id : object.keySet()) { value = object.get(id); logger.info(value + " -> " + value.getClass().getName()); } logger.info("---------------"); } }
From source file:com.jaspersoft.mongodb.MongoDbFieldsProvider.java
License:Open Source License
private void processDBObject(String parentFieldName, DBObject currentDbObject, Map<String, Class<?>> fieldNames) { Object value;// www . jav a 2 s . c om if (logger.isDebugEnabled()) { logger.debug("processDBObject parentFieldName: " + parentFieldName); logger.debug("processDBObject currentDbObject: " + currentDbObject.toString()); } for (String fieldName : currentDbObject.keySet()) { value = currentDbObject.get(fieldName); if (value == null) { continue; } if (value instanceof BasicDBList) { fieldNames.put((parentFieldName == null ? "" : parentFieldName + ".") + fieldName, List.class); } else if (value instanceof BasicDBObject) { processDBObject((parentFieldName == null ? "" : parentFieldName + ".") + fieldName, (DBObject) value, fieldNames); } else { fieldNames.put((parentFieldName == null ? "" : parentFieldName + ".") + fieldName, value.getClass()); } } }