List of usage examples for com.mongodb BasicDBObject get
public Object get(final String key)
From source file:com.ikanow.infinit.e.api.knowledge.output.RssOutput.java
License:Open Source License
public String getDocs(ResponsePojo rp) { // Create the feed using Rome SyndFeed feed = new SyndFeedImpl(); // create the feed String feedType = "rss_2.0"; // Setup a list of feeds @SuppressWarnings("unchecked") List<BasicDBObject> docs = (List<BasicDBObject>) rp.getData(); // Set the title of the feed feed.setTitle("Infinit.e Knowledge Discovery RSS Feed"); feed.setDescription("Infinit.e Search Results RSS Feed"); feed.setLanguage("en-us"); feed.setPublishedDate(new Date(System.currentTimeMillis())); feed.setFeedType(feedType); // set the type of your feed feed.setLink("http://www.ikanow.com"); // Establish the list to contain the feeds List<SyndEntry> entries = new ArrayList<SyndEntry>(); // loop through the result set for (BasicDBObject fdbo : docs) { SyndEntry entry = new SyndEntryImpl(); // create a feed entry if (fdbo.getString("title") != null) { entry.setTitle(fdbo.getString("title")); Date pubDate = (Date) fdbo.get("publishedDate"); if (pubDate != null) entry.setPublishedDate(pubDate); if (fdbo.getString("url") != null) entry.setLink(fdbo.getString("url")); if (fdbo.getString("description") != null) { // Create the content for the entry SyndContent content = new SyndContentImpl(); // create the content of your entry content.setType("text/plain"); content.setValue(fdbo.getString("description")); entry.setDescription(content); }//from w w w. j a va 2 s . c om entries.add(entry); } } feed.setEntries(entries); // you can add multiple entries in your feed SyndFeedOutput output = new SyndFeedOutput(); String rss = null; try { rss = output.outputString(feed); } catch (FeedException e) { e.printStackTrace(); logger.error("Line: [" + e.getStackTrace()[2].getLineNumber() + "] " + e.getMessage()); } return rss; }
From source file:com.ikanow.infinit.e.api.knowledge.processing.ScoringUtils.java
License:Open Source License
@SuppressWarnings("unchecked") private void stage1_initialCountingLoop(DBCursor docs, AdvancedQueryPojo.QueryScorePojo scoreParams, int toReturn, StatisticsPojo scores, LinkedList<BasicDBObject> standaloneEventsReturn, int nCommunities) { double s0_nQuerySubsetDocCountInv = 1.0 / (double) _s0_nQuerySubsetDocCount; // Some memory management: DBCollection dbc = MongoDbManager.getDocument().getMetadata(); DBDecoderFactory defaultDecoder = dbc.getDBDecoderFactory(); try {// w w w . jav a 2 s .c o m SizeReportingBasicBSONDecoder sizeReportingDecoder = new SizeReportingBasicBSONDecoder(); dbc.setDBDecoderFactory(sizeReportingDecoder); long currMemUsage = 0; int ndocs = 0; long lastBatch = 0L; long initialUnusedMemory = Runtime.getRuntime().maxMemory() - Runtime.getRuntime().totalMemory(); long initialFreeMemory = Runtime.getRuntime().freeMemory(); for (DBObject f0 : docs) { BasicDBObject f = (BasicDBObject) f0; long newMemUsage = sizeReportingDecoder.getSize(); if ((newMemUsage - currMemUsage) > 0) { // check every batch long now = new Date().getTime(); //DEBUG //logger.warn(ndocs + " : " + (now - lastBatch) + " : " + newMemUsage + " VS " + Runtime.getRuntime().maxMemory() + " UNUSED " + (Runtime.getRuntime().maxMemory() - Runtime.getRuntime().totalMemory()) + " FREE " + Runtime.getRuntime().freeMemory()); // Check vs total memory: long runtimeMem = Runtime.getRuntime().maxMemory(); // note newMemUsage is the input memory ... gets expanded ~6x by the BSON-ification, allowed at most 1/4rd of memory... // Also if we're taking more than 20s for a batch then limp over the limit and exit... if (((newMemUsage * 24) > runtimeMem) || (((now - lastBatch) > 20000L) && (ndocs >= toReturn))) { long finalUnusedMemory = Runtime.getRuntime().maxMemory() - Runtime.getRuntime().totalMemory(); long finalFreeMemory = Runtime.getRuntime().freeMemory(); logger.error("Query truncated memUsage=" + newMemUsage + ", memory=" + runtimeMem + ", docs=" + ndocs + ", totaldocs=" + scores.found + ", init_free_mem=" + initialFreeMemory + ", end_free_mem=" + finalFreeMemory + ", init_unused_mem=" + initialUnusedMemory + ", end_unused_mem=" + finalUnusedMemory); break; } //TESTED currMemUsage = newMemUsage; lastBatch = now; } //TESTED ndocs++; // Simple handling for standalone events if ((null != _s0_standaloneEventAggregator) && !_s0_bNeedToCalcSig) { //if _s0_bNeedToCalcSig then do this elsewhere ScoringUtils_Associations.addStandaloneEvents(f, 0.0, 0, _s0_standaloneEventAggregator, _s0_bEntityTypeFilterPositive, _s0_bAssocVerbFilterPositive, _s0_entityTypeFilter, _s0_assocVerbFilter, _s0_bEvents, _s0_bSummaries, _s0_bFacts); } //TESTED if (!_s0_bNeedToCalcSig) { continue; } //TESTED if (nCommunities > 1) { // (could have pan-community entities) ObjectId communityId = (ObjectId) f.get(DocumentPojo.communityId_); if (null != communityId) { // (have big problems if so, but anyway!) int retval = _s0_multiCommunityHandler.community_getIdAndInitialize(communityId, _s1_entitiesInDataset); // (returns an int community id but also sets it into the cache, so just use that below) if (Integer.MIN_VALUE == retval) { //this document cannot be viewed from within this set of communities continue; } } } //TESTED TempDocBucket docBucket = new TempDocBucket(); docBucket.dbo = f; ObjectId id = (ObjectId) f.get(DocumentPojo._id_); // If we're going to weight relevance in, or we need the geo temporal decay: if ((0 != scoreParams.relWeight) || (null != scoreParams.timeProx) || (null != scoreParams.geoProx)) { StatisticsPojo.Score scoreObj = scores.getScore().get(id); if (null != scoreObj) { docBucket.explain = scoreObj.explain; // (will normally be null) docBucket.luceneScore = scoreObj.score; if ((null != scoreParams.timeProx) || (null != scoreParams.geoProx)) { if (scoreObj.decay >= 0.0) { docBucket.geoTemporalDecay = scoreObj.decay; } // (see also below for low accuracy geo scoring) } } else { docBucket.luceneScore = 1.0; } } //TESTED else if (this._s0_sortingByDate) { StatisticsPojo.Score scoreObj = scores.getScore().get(id); if (null != scoreObj) { docBucket.nLuceneIndex = scoreObj.nIndex; } } docBucket.manualWeighting = this.getManualScoreWeights(scoreParams, f); BasicDBList l = (BasicDBList) (f.get(DocumentPojo.entities_)); if (null != l) { long nEntsInDoc = l.size(); double dBestGeoScore = 0.0; // (for low accuracy geo only) for (Iterator<?> e0 = l.iterator(); e0.hasNext();) { BasicDBObject e = (BasicDBObject) e0.next(); BasicDBObject tmpGeotag = null; if (_s3_bLowAccuracyGeo || (null != _s1_dManualGeoDecay_latLonInvdecay)) { // low accuracy geo, need to look for geotag tmpGeotag = (BasicDBObject) e.get(EntityPojo.geotag_); } // Get attributes double freq = -1.0; long ntotaldoccount = -1; String entity_index; Double sentiment = null; try { sentiment = (Double) e.get(EntityPojo.sentiment_); ntotaldoccount = e.getLong(EntityPojo.doccount_); freq = e.getDouble(EntityPojo.frequency_); entity_index = e.getString(EntityPojo.index_); if (null == entity_index) { // Just bypass the entity e.put(EntityPojo.significance_, 0.0); nEntsInDoc--; continue; } } catch (Exception ex) { try { String sfreq; if (ntotaldoccount < 0) { sfreq = e.getString(EntityPojo.doccount_); ntotaldoccount = Long.valueOf(sfreq); } if (freq < -0.5) { sfreq = e.getString(EntityPojo.frequency_); freq = Long.valueOf(sfreq).doubleValue(); } entity_index = e.getString(EntityPojo.index_); if (null == entity_index) { // Just bypass the entity e.put(EntityPojo.significance_, 0.0); nEntsInDoc--; continue; } } catch (Exception e2) { // Just bypass the entity e.put(EntityPojo.significance_, 0.0); nEntsInDoc--; continue; } } //TESTED // First loop through is just counting // Retrieve entity (create/initialzie if necessary) EntSigHolder shp = _s1_entitiesInDataset.get(entity_index); if (null == shp) { if (ntotaldoccount > (long) _s0_globalDocCount) { // obviously can't have more entities-in-dos than docs... ntotaldoccount = (long) _s0_globalDocCount; } shp = new EntSigHolder(entity_index, ntotaldoccount, _s0_multiCommunityHandler); // Stage 1a alias handling: set up infrastructure, calculate doc overlap if (null != _s1_aliasLookup) { stage1_initAlias(shp); } if ((null != shp.aliasInfo) && (null == shp.masterAliasSH)) { // this is the discard alias nEntsInDoc--; continue; } //TESTED // Check if entity is in type filter list if (null != _s0_entityTypeFilter) { String entType = null; if (null != shp.aliasInfo) { entType = shp.aliasInfo.getType(); } else { entType = e.getString(EntityPojo.type_); } if (_s0_bEntityTypeFilterPositive) { if ((null != entType) && !_s0_entityTypeFilter.contains(entType.toLowerCase())) { nEntsInDoc--; continue; } } else if ((null != entType) && _s0_entityTypeFilter.contains(entType.toLowerCase())) { //(negative filter) nEntsInDoc--; continue; } } //TESTED (end entity filter) // Geo: if (null != shp.aliasInfo) { if (null != shp.aliasInfo.getGeotag()) { //Geo, overwrite/create tmpGeotag if (_s3_bLowAccuracyGeo || _s3_bExtraAliasGeo || (null != _s1_dManualGeoDecay_latLonInvdecay)) { // Always capture alias geo, even if not in low accuracy mode because we add it to the // legitimate geo: if ((_s3_bLowAccuracyGeo || _s3_bExtraAliasGeo) && (null == _s3_geoBuckets)) { // Initialize the buckets if this is for aggregation not just decay _s3_geoBuckets = (LinkedList<EntSigHolder>[]) new LinkedList[_s3_nGEO_BUCKETS]; } if (null == tmpGeotag) { tmpGeotag = new BasicDBObject(); } tmpGeotag.put(GeoPojo.lat_, shp.aliasInfo.getGeotag().lat); tmpGeotag.put(GeoPojo.lon_, shp.aliasInfo.getGeotag().lon); if (null != shp.aliasInfo.getOntology_type()) { e.put(EntityPojo.ontology_type_, shp.aliasInfo.getOntology_type()); } } } } //TESTED (end geo for aggregation or decay) _s1_entitiesInDataset.put(entity_index, shp); // end Stage 1a alias handling } //(end if is alias) // Stage 1b alias handling: calculate document counts (taking overlaps into account) if (null != shp.masterAliasSH) { // Counts: shp.masterAliasSH.nTotalDocCount++; // docs including overlaps shp.masterAliasSH.avgFreqOverQuerySubset += freq; // Keep track of overlaps: if (f != shp.masterAliasSH.unusedDbo) { shp.masterAliasSH.unusedDbo = f; // (note this is only used in stage 1, alias.unusedDbo is re-used differently in stage 3/4) shp.masterAliasSH.nDocCountInQuerySubset++; // non-overlapping docs ie < shp.nDocCountInQuerySubset } // Sentiment: shp.masterAliasSH.positiveSentiment += shp.positiveSentiment; shp.masterAliasSH.negativeSentiment += shp.negativeSentiment; if (null != sentiment) { shp.masterAliasSH.nTotalSentimentValues++; } } //TESTED (end if is alias) // end Stage 1b // Pan-community logic (this needs to be before the entity object is updated) if (_s0_multiCommunityHandler.isActive()) { _s0_multiCommunityHandler.community_updateCorrelations(shp, ntotaldoccount, entity_index); } else { // (Once we've started multi-community logic, this is no longer desirable) if ((ntotaldoccount > shp.nTotalDocCount) && (ntotaldoccount <= _s0_globalDocCount)) { shp.nTotalDocCount = ntotaldoccount; } //(note there used to be some cases where we adjusted for dc/tf==0, but the // underlying issue in the data model that caused this has been fixed, so it's // now a pathological case that can be ignored) } //(TESTED) // Update counts: _s1_sumFreqInQuerySubset += freq; shp.avgFreqOverQuerySubset += freq; shp.nDocCountInQuerySubset++; shp.decayedDocCountInQuerySubset += docBucket.geoTemporalDecay; // (note this doesn't handle low accuracy geo-decay ... we'll address that via a separate term) TempEntityInDocBucket entBucket = new TempEntityInDocBucket(); entBucket.dbo = e; entBucket.freq = freq; entBucket.doc = docBucket; shp.entityInstances.add(entBucket); if (null != tmpGeotag) { // (only needed for low accuracy geo aggregation) if ((_s3_bLowAccuracyGeo || _s3_bExtraAliasGeo) && (null == shp.geotag)) { // (first time for shp only) shp.geotag = tmpGeotag; shp.geotaggedEntity = e; // (ie for onto type, which has been overwritten in the alias case...) } if (null != _s1_dManualGeoDecay_latLonInvdecay) { // Emulate scripted Lucene calculations double minlat = tmpGeotag.getDouble(GeoPojo.lat_); double minlon = tmpGeotag.getDouble(GeoPojo.lon_); double paramlat = _s1_dManualGeoDecay_latLonInvdecay[0]; double paramlon = _s1_dManualGeoDecay_latLonInvdecay[1]; double gdecay = _s1_dManualGeoDecay_latLonInvdecay[2]; char ontCode = GeoOntologyMapping .encodeOntologyCode(e.getString(EntityPojo.ontology_type_)); double dDecay = QueryDecayScript.getGeoDecay(minlat, minlon, paramlat, paramlon, gdecay, ontCode); if (dDecay > dBestGeoScore) { dBestGeoScore = dDecay; } } //TESTED } //(end if entity has geo and need to process entity geo) if (freq > shp.maxFreq) { shp.maxFreq = freq; } // Sentiment: if ((null != sentiment) && (Math.abs(sentiment) <= 1.1)) { // (actually 1.0) shp.nTotalSentimentValues++; if (sentiment > 0.0) { shp.positiveSentiment += sentiment; } else { shp.negativeSentiment += sentiment; } } else if (null != sentiment) { // corrupt sentiment for some reason?! e.put(EntityPojo.sentiment_, null); } docBucket.docLength += freq; } //(end loop over entities) docBucket.nLeftToProcess = nEntsInDoc; docBucket.nEntsInDoc = (int) nEntsInDoc; if (null != this._s1_dManualGeoDecay_latLonInvdecay) { // Low accuracy geo-calculations docBucket.geoTemporalDecay *= dBestGeoScore; docBucket.luceneScore *= dBestGeoScore; _s2_dAvgLowAccuracyGeoDecay += dBestGeoScore * s0_nQuerySubsetDocCountInv; } //TESTED } // (end if feed has entities) // Handle documents with no entities - can still promote them if (0 == docBucket.nLeftToProcess) { // (use this rather than doc length in case all the entities had freq 0) _s1_noEntityBuckets.add(docBucket); } } // (end loop over feeds) //TESTED } finally { dbc.setDBDecoderFactory(defaultDecoder); } }
From source file:com.ikanow.infinit.e.api.knowledge.processing.ScoringUtils.java
License:Open Source License
private void stage4_prepareDocsForOutput(AdvancedQueryPojo.QueryScorePojo scoreParams, StatisticsPojo scores, long nToClientLimit, LinkedList<BasicDBObject> returnList) { // Get the documents long nDocs = 0; double dBestScore = 0.0; double dAvgScore = 0.0; double dSigFactor = 100.0 / (_s3_dSigScalingFactor * _s2_dApproxAverageDocumentSig); double dRelFactor = 100.0 / (_s3_dLuceneScalingFactor * _s0_avgLuceneScore); // Start at the bottom of the list, so don't need to worry about skipping documents, just count out from the bottom // The call to stage3_calculateTFTerms with nStart+nToClientLimit handles the rest Iterator<TempDocBucket> pqIt = _s3_pqDocs.iterator(); while (pqIt.hasNext() && (nDocs < nToClientLimit)) { TempDocBucket qsf = pqIt.next(); nDocs++;//from w ww . j a v a 2s . c o m if (!_s0_sortingByDate) { dBestScore = qsf.totalScore; } dAvgScore += dBestScore; BasicDBObject f = qsf.dbo; // Phase "0" - these are the highest prio events boolean bNeedToFilterAndAliasAssoc_event = true; boolean bNeedToFilterAndAliasAssoc_fact = true; boolean bNeedToFilterAndAliasAssoc_summary = true; if (null != _s0_standaloneEventAggregator) { ScoringUtils_Associations.addStandaloneEvents(qsf.dbo, qsf.aggSignificance, 0, _s0_standaloneEventAggregator, _s0_bEntityTypeFilterPositive, _s0_bAssocVerbFilterPositive, _s0_entityTypeFilter, _s0_assocVerbFilter, _s0_bEvents, _s0_bSummaries, _s0_bFacts); bNeedToFilterAndAliasAssoc_event = false; bNeedToFilterAndAliasAssoc_fact = false; bNeedToFilterAndAliasAssoc_summary = false; } //TESTED if (null != _s0_lowAccuracyAssociationAggregator_events) { ScoringUtils_Associations.addStandaloneEvents(qsf.dbo, qsf.aggSignificance, 0, _s0_lowAccuracyAssociationAggregator_events, _s0_bEntityTypeFilterPositive, _s0_bAssocVerbFilterPositive, _s0_entityTypeFilter, _s0_assocVerbFilter, true, false, false); bNeedToFilterAndAliasAssoc_event = false; } //TESTED if (null != _s0_lowAccuracyAssociationAggregator_facts) { ScoringUtils_Associations.addStandaloneEvents(qsf.dbo, qsf.aggSignificance, 0, _s0_lowAccuracyAssociationAggregator_facts, _s0_bEntityTypeFilterPositive, _s0_bAssocVerbFilterPositive, _s0_entityTypeFilter, _s0_assocVerbFilter, false, false, true); bNeedToFilterAndAliasAssoc_fact = false; } //TESTED try { DocumentPojoApiMap.mapToApi(f); // Handle deduplication/multi-community code: if (null != qsf.dupList) { try { ScoringUtils_MultiCommunity.community_combineDuplicateDocs(qsf); } catch (Exception e) { // Do nothing, just carry on with minimal damage! } } // Scoring: double d = qsf.aggSignificance * dSigFactor; if (Double.isNaN(d)) { f.put(DocumentPojo.aggregateSignif_, 0.0); } else { f.put(DocumentPojo.aggregateSignif_, d); } d = qsf.luceneScore * dRelFactor; if (Double.isNaN(d)) { f.put(DocumentPojo.queryRelevance_, 0.0); } else { f.put(DocumentPojo.queryRelevance_, d); } if (!_s0_sortingByDate) { f.put(DocumentPojo.score_, qsf.totalScore); } BasicDBList l = (BasicDBList) (f.get(DocumentPojo.entities_)); // Handle update ids vs normal ids: ObjectId updateId = (ObjectId) f.get(DocumentPojo.updateId_); if (null != updateId) { // swap the 2... f.put(DocumentPojo.updateId_, f.get(DocumentPojo._id_)); f.put(DocumentPojo._id_, updateId); } // Check if entities enabled if ((null != l) && (!_s0_bGeoEnts && !_s0_bNonGeoEnts)) { f.removeField(DocumentPojo.entities_); l = null; } //TESTED // Check if events etc enabled if ((!_s0_bEvents && !_s0_bFacts && !_s0_bSummaries)) { f.removeField(DocumentPojo.associations_); } //TESTED else if (!_s0_bEvents || !_s0_bFacts || !_s0_bSummaries || (null != _s0_assocVerbFilter)) { // Keep only specified event_types BasicDBList lev = (BasicDBList) (f.get(DocumentPojo.associations_)); if (null != lev) { for (Iterator<?> e0 = lev.iterator(); e0.hasNext();) { BasicDBObject e = (BasicDBObject) e0.next(); // Type filter boolean bNeedToFilterAndAliasAssoc = true; String sEvType = e.getString(AssociationPojo.assoc_type_); boolean bKeep = true; if (null == sEvType) { bKeep = false; } else if (sEvType.equalsIgnoreCase("event")) { if (!_s0_bEvents) bKeep = false; bNeedToFilterAndAliasAssoc = bNeedToFilterAndAliasAssoc_event; } else if (sEvType.equalsIgnoreCase("fact")) { if (!_s0_bFacts) bKeep = false; bNeedToFilterAndAliasAssoc = bNeedToFilterAndAliasAssoc_fact; } else if (sEvType.equalsIgnoreCase("summary")) { if (!_s0_bSummaries) bKeep = false; bNeedToFilterAndAliasAssoc = bNeedToFilterAndAliasAssoc_summary; } if (!bKeep) { e0.remove(); } else { // Type matches, now for some more complex logic.... if (bNeedToFilterAndAliasAssoc) { // (otherwise done already) bKeep = ScoringUtils_Associations.filterAndAliasAssociation(e, _s1_aliasLookup, true, _s0_bEntityTypeFilterPositive, _s0_bAssocVerbFilterPositive, _s0_entityTypeFilter, _s0_assocVerbFilter); if (!bKeep) { e0.remove(); } } //TESTED } //(end output filter logic) } // (end loop over events) } // (end if this doc has events) } //TESTED // Check if metadata is enabled if (!_s0_bMetadata) { f.removeField(DocumentPojo.metadata_); } //TESTED if (null != l) { for (Iterator<?> e0 = l.iterator(); e0.hasNext();) { BasicDBObject e = (BasicDBObject) e0.next(); if (!_s0_bNonGeoEnts) { // then must only be getting geo (else wouldn't be in this loop) if (null == e.get(EntityPojo.geotag_)) { e0.remove(); continue; } } String entity_index = e.getString(EntityPojo.index_); if (null == entity_index) continue; EntSigHolder shp = (EntSigHolder) _s1_entitiesInDataset.get(entity_index); if (null != shp) { // Stage 4x: alias processing, just overwrite // (note don't delete "duplicate entities", hard-to-be-globally-consistent // and will potentially throw data away which might be undesirable) if (null != shp.masterAliasSH) { shp = shp.masterAliasSH; // (already has all the aggregated values used below) if (!entity_index.equals(shp.aliasInfo.getIndex())) { e.put(EntityPojo.index_, shp.aliasInfo.getIndex()); e.put(EntityPojo.disambiguated_name_, shp.aliasInfo.getDisambiguatedName()); e.put(EntityPojo.type_, shp.aliasInfo.getType()); e.put(EntityPojo.dimension_, shp.aliasInfo.getDimension()); if (null != shp.aliasInfo.getGeotag()) { BasicDBObject aliasedGeoTag = new BasicDBObject(); aliasedGeoTag.put(GeoPojo.lat_, shp.aliasInfo.getGeotag().lat); aliasedGeoTag.put(GeoPojo.lon_, shp.aliasInfo.getGeotag().lon); e.put(EntityPojo.geotag_, aliasedGeoTag); if (null != shp.aliasInfo.getOntology_type()) { e.put(EntityPojo.ontology_type_, shp.aliasInfo.getOntology_type()); } } //TESTED } } //TESTED // end Stage 4x of alias processing double dataSig = shp.datasetSignificance; if (Double.isNaN(dataSig)) { e.put(EntityPojo.datasetSignificance_, 0.0); } else { e.put(EntityPojo.datasetSignificance_, dataSig); } e.put(EntityPojo.queryCoverage_, shp.queryCoverage); e.put(EntityPojo.averageFreq_, shp.avgFreqOverQuerySubset); if (shp.nTotalSentimentValues > 0) { e.put(EntityPojo.positiveSentiment_, shp.positiveSentiment); e.put(EntityPojo.negativeSentiment_, shp.negativeSentiment); e.put(EntityPojo.sentimentCount_, shp.nTotalSentimentValues); } } else { // (most likely to occur if the entity is discarded (alias/filter) or is corrupt in some way) e0.remove(); continue; } } //(end loop over entities) } // (end if feed has entities) //TESTED // Explain if enabled if (null != qsf.explain) { f.put(DocumentPojo.explain_, qsf.explain); } // Add to the end of the list (so will come back from API call in natural order, highest first) returnList.addFirst(f); // (add elements to the front of the list so that the top of the list is ordered by priority) } catch (Exception e) { // Probably a JSON error, just carry on String title = f.getString(DocumentPojo.title_); logger.error(title + ": " + e.getMessage()); } } // (end loop over feeds) //TESTED // Update the scores: scores.maxScore = (float) dBestScore; if (nDocs > 0) { scores.avgScore = (float) dAvgScore / nDocs; } }
From source file:com.ikanow.infinit.e.api.knowledge.processing.ScoringUtils.java
License:Open Source License
private void stage4_prepareEntsForOutput(LinkedList<BasicDBObject> entityReturn) { if (_s0_nNumEntsReturn > 0) { // (else entities not enabled) for (EntSigHolder qsf = _s3_pqEnt.poll(); null != qsf; qsf = _s3_pqEnt.poll()) // (start with lowest ranking) {/* w ww . jav a 2 s .c om*/ BasicDBObject ent = qsf.unusedDbo; if (null == ent) { int nTries = 0; if (null != qsf.entityInstances) { // (should never be null but just to be on the safe side... for (TempEntityInDocBucket tefb : qsf.entityInstances) { // (Try to find an entity that wasn't promoted ie can now be re-used // if we can't find one quite quickly then bail out and we'll pay the cost of cloning it) if (!tefb.doc.bPromoted) { ent = tefb.dbo; break; } else if (++nTries > 10) { break; } } if (null == ent) { ent = qsf.entityInstances.get(0).dbo; } } else { // (no entityInstances, something alias-related has gone wrong, just skip) continue; } } //TESTED qsf.entityInstances = null; // (don't need this any more, can be gc'd) try { if (null != qsf.aliasInfo) { if (!qsf.index.equals(qsf.aliasInfo.getIndex())) { ent.put(EntityPojo.index_, qsf.aliasInfo.getIndex()); ent.put(EntityPojo.disambiguated_name_, qsf.aliasInfo.getDisambiguatedName()); ent.put(EntityPojo.type_, qsf.aliasInfo.getType()); ent.put(EntityPojo.dimension_, qsf.aliasInfo.getDimension()); if (null != qsf.aliasInfo.getGeotag()) { BasicDBObject aliasedGeoTag = new BasicDBObject(); aliasedGeoTag.put(GeoPojo.lat_, qsf.aliasInfo.getGeotag().lat); aliasedGeoTag.put(GeoPojo.lon_, qsf.aliasInfo.getGeotag().lon); ent.put(EntityPojo.geotag_, aliasedGeoTag); if (null != qsf.aliasInfo.getOntology_type()) { ent.put(EntityPojo.ontology_type_, qsf.aliasInfo.getOntology_type()); } } //TESTED } } //TESTED if (null == ent.get(EntityPojo.datasetSignificance_)) { // Not getting promoted so need to add fields... if (Double.isNaN(qsf.datasetSignificance)) { ent.put("datasetSignificance", 0.0); } else { ent.put(EntityPojo.datasetSignificance_, qsf.datasetSignificance); } ent.put(EntityPojo.queryCoverage_, qsf.queryCoverage); ent.put(EntityPojo.averageFreq_, qsf.avgFreqOverQuerySubset); if (qsf.nTotalSentimentValues > 0) { ent.put(EntityPojo.positiveSentiment_, qsf.positiveSentiment); ent.put(EntityPojo.negativeSentiment_, qsf.negativeSentiment); ent.put(EntityPojo.sentimentCount_, qsf.nTotalSentimentValues); } } else { // (... but can just use it without cloning) BasicDBObject ent2 = new BasicDBObject(); for (Map.Entry<String, Object> kv : ent.entrySet()) { ent2.append(kv.getKey(), kv.getValue()); } ent = ent2; } ent.removeField(EntityPojo.relevance_); if (Double.isNaN(qsf.maxDocSig)) { ent.put(EntityPojo.significance_, 0.0); } else { ent.put(EntityPojo.significance_, qsf.maxDocSig); } ent.put(EntityPojo.frequency_, (long) qsf.maxFreq); entityReturn.addFirst(ent); } catch (Exception e) { // Probably a JSON error, just carry on String title = ent.getString(EntityPojo.index_); logger.error(title + ": " + e.getMessage()); } //TESTED } } //TESTED }
From source file:com.ikanow.infinit.e.api.knowledge.processing.ScoringUtils.java
License:Open Source License
private double getManualScoreWeights(AdvancedQueryPojo.QueryScorePojo scoreParams, BasicDBObject doc) { // Highest prio: source key weight if (null != scoreParams.sourceWeights) { String sourceKey = DocumentPojo.getSourceKey(doc.getString(DocumentPojo.sourceKey_)); Double dWeight = scoreParams.sourceWeights.get(sourceKey); if (null != dWeight) { return dWeight; }/*ww w . ja v a 2s .c om*/ } // Middle prio: type if (null != scoreParams.typeWeights) { String mediaType = doc.getString(DocumentPojo.mediaType_); Double dWeight = scoreParams.typeWeights.get(mediaType); if (null != dWeight) { return dWeight; } } // Lowest prio: average of tags if (null != scoreParams.tagWeights) { double dScore = 0.0; int nComps = 0; BasicDBList tags = (BasicDBList) doc.get(DocumentPojo.tags_); if (null != tags) { for (Object tagObj : tags) { String tag = (String) tagObj; Double dWeight = scoreParams.tagWeights.get(tag); if (null != dWeight) { nComps++; dScore += dWeight; } } if (nComps > 0) { return dScore / nComps; } } } return 1.0; }
From source file:com.ikanow.infinit.e.api.knowledge.processing.ScoringUtils_Associations.java
License:Open Source License
static void addStandaloneEvents(BasicDBObject doc, double dDocSig, int nPhase, StandaloneEventHashAggregator standaloneEventAggregator, boolean bEntTypeFilterPositive, boolean bAssocVerbFilterPositive, HashSet<String> entTypeFilter, HashSet<String> assocVerbFilter, boolean bEvents, boolean bSummaries, boolean bFacts) { if (standaloneEventAggregator.bSimulateAggregation) { bSummaries = false;// ww w .ja va 2 s . c om } String sDocIsoPubDate = null; BasicDBList lev = (BasicDBList) (doc.get(DocumentPojo.associations_)); if (null != lev) { for (Iterator<?> e0 = lev.iterator(); e0.hasNext();) { BasicDBObject e = (BasicDBObject) e0.next(); String sEvType = e.getString(AssociationPojo.assoc_type_); boolean bIsFact = false; boolean bIsSummary = false; boolean bKeep = true; if (null == sEvType) { bKeep = false; } else if (sEvType.equalsIgnoreCase("event")) { if (!bEvents) bKeep = false; } else if (sEvType.equalsIgnoreCase("fact")) { if (!bFacts) bKeep = false; bIsFact = true; } else if (sEvType.equalsIgnoreCase("summary")) { if (!bSummaries) bKeep = false; bIsSummary = true; } //TESTED x4 // Filter and aliasing logic: if (bKeep) { boolean bKeep2 = filterAndAliasAssociation(e, standaloneEventAggregator.aliasLookup, true, bEntTypeFilterPositive, bAssocVerbFilterPositive, entTypeFilter, assocVerbFilter); if (!bKeep2) { e0.remove(); // (remove/rename events based on filters where we can, // means we don't have to do it in stage4) bKeep = false; } } //TESTED if (bKeep) { String time_start = null; String time_end = null; // (normally not needed) if (!standaloneEventAggregator.bSimulateAggregation) { //else times are discarded // Add time from document time_start = e.getString(AssociationPojo.time_start_); if (null == time_start) { if (null == sDocIsoPubDate) { // Convert docu pub date to ISO (day granularity): Date pubDate = (Date) doc.get(DocumentPojo.publishedDate_); if (null != pubDate) { SimpleDateFormat f2 = new SimpleDateFormat("yyyy-MM-dd"); time_start = f2.format(pubDate); } } else { time_start = sDocIsoPubDate; // (so it doesn't get added again below) } } //TESTED else { // Remove hourly granularity for consistency time_start = time_start.replaceAll("T.*$", ""); time_end = e.getString(AssociationPojo.time_end_); if (null != time_end) { time_end = time_end.replaceAll("T.*$", ""); } } //TESTED (with debug code, eg time_start = "1997-07-16T19:20:30+01:00") if (null != time_start) { // Ensure it has day granularity, to help with aggregation e.put(AssociationPojo.time_start_, time_start); if (null != time_end) { e.put(AssociationPojo.time_end_, time_end); } } //TESTED } //(end if normal standalone mode, not aggregation simulation) StandaloneEventHashCode evtHolder = new StandaloneEventHashCode( standaloneEventAggregator.bSimulateAggregation, e, bIsSummary, bIsFact); BasicDBObject oldEvt = standaloneEventAggregator.store.get(evtHolder); if (null == oldEvt) { // Doc count (see below) e.put(AssociationPojo.doccount_, 1); double dAssocSig = dDocSig * dDocSig; // Weight down summaries slightly (80%), and summaries with missing entities a lot (50%) if (bIsSummary) { String sEntity2 = (String) e.get(AssociationPojo.entity2_); if (null == sEntity2) { dAssocSig *= 0.50; } else { dAssocSig *= 0.80; } } // Running significance count: e.put(AssociationPojo.assoc_sig_, dAssocSig); // (use sum-squared to score up events that occur frequently) if (dAssocSig > standaloneEventAggregator.dMaxSig) { standaloneEventAggregator.dMaxSig = dAssocSig; } standaloneEventAggregator.store.put(evtHolder, e); // Add to list in some sort of very basic order... if (2 == nPhase) { // Put at the back, it's probably really low sig standaloneEventAggregator.tmpList.add(e); } else if (1 == nPhase) { // Put at the front until Phase 0 comes along standaloneEventAggregator.tmpList.addFirst(e); standaloneEventAggregator.nPhase1Events++; } else { // phases 0 and 1 get the higher orderings standaloneEventAggregator.tmpList.addFirst(e); standaloneEventAggregator.nPhase0Events++; } } else { // Update doc count long nDocCount = oldEvt.getInt(AssociationPojo.doccount_, 1) + 1; oldEvt.put(AssociationPojo.doccount_, nDocCount); // Running significance count: double dAssocSig = oldEvt.getDouble(AssociationPojo.doccount_) + dDocSig * dDocSig; oldEvt.put(AssociationPojo.assoc_sig_, dAssocSig); if (dAssocSig / nDocCount > standaloneEventAggregator.dMaxSig) { standaloneEventAggregator.dMaxSig = dAssocSig; } if (bIsFact && !standaloneEventAggregator.bSimulateAggregation) { // For facts, also update the time range: String old_time_start = oldEvt.getString(AssociationPojo.time_start_); String old_time_end = oldEvt.getString(AssociationPojo.time_end_); // Just keep this really simple and inefficient: TreeSet<String> timeOrder = new TreeSet<String>(); if (null != old_time_start) { timeOrder.add(old_time_start); } if (null != old_time_end) { timeOrder.add(old_time_end); } if (null != time_start) { timeOrder.add(time_start); } if (null != time_end) { timeOrder.add(time_end); } if (timeOrder.size() > 1) { Iterator<String> itStart = timeOrder.iterator(); oldEvt.put(AssociationPojo.time_start_, itStart.next()); Iterator<String> itEnd = timeOrder.descendingIterator(); oldEvt.put(AssociationPojo.time_end_, itEnd.next()); } } // end if is fact - treat times different } //TESTED } // (end if keeping this event) } // (end loop over events) } // (end if this doc has events) }
From source file:com.ikanow.infinit.e.api.knowledge.QueryHandler.java
License:Open Source License
private ResponsePojo getSavedQueryInstead(String storedQueryNameOrId, String[] communityIdStrs, AdvancedQueryPojo query) {/*from w w w .j av a 2 s . c o m*/ ResponsePojo rp = null; ObjectId oid = null; BasicDBObject jobQuery = null; try { oid = new ObjectId(storedQueryNameOrId); jobQuery = new BasicDBObject(CustomMapReduceJobPojo._id_, oid); } catch (Exception e) { jobQuery = new BasicDBObject(CustomMapReduceJobPojo.jobtitle_, storedQueryNameOrId); } CustomMapReduceJobPojo savedJob = CustomMapReduceJobPojo .fromDb(DbManager.getCustom().getLookup().findOne(jobQuery), CustomMapReduceJobPojo.class); if (null != savedJob) { // Is this even a saved job? if (null != savedJob.jarURL) { savedJob = null; } } if (null != savedJob) { // Authorization boolean auth = false; String communityIdStrList = Arrays.toString(communityIdStrs); for (ObjectId commId : savedJob.communityIds) { if (communityIdStrList.contains(commId.toString())) { auth = true; break; } } if (!auth) { savedJob = null; } if (null == savedJob) { throw new RuntimeException( "Can't find saved query, or is a custom job not a query, or authorization error"); } // OK go get the results of the job DBCollection coll = DbManager.getCollection(savedJob.getOutputDatabase(), savedJob.outputCollection); BasicDBObject result = (BasicDBObject) coll.findOne(); // (at some point support multiple saved queries) if (null == result) { throw new RuntimeException("Saved query is empty"); } BasicDBObject apiResultToConvert = (BasicDBObject) result.get("value"); if (null == apiResultToConvert) { throw new RuntimeException("Saved query has invalid format"); } rp = ResponsePojo.fromDb(apiResultToConvert); } else if (null != oid) { // Support new user/doc queues SharePojo share = SharePojo.fromDb(DbManager.getSocial().getShare().findOne(jobQuery), SharePojo.class); if ((null == share) || (null == share.getShare()) || (!share.getType().equals(DocumentQueueControlPojo.UserQueue) && !share.getType().equals(DocumentQueueControlPojo.SavedQueryQueue))) { throw new RuntimeException( "Can't find saved query, or is a custom job not a query, or authorization error"); } else { // share.share is a DocumentQueueControlPojo DocumentQueueControlPojo queue = DocumentQueueControlPojo.fromApi(share.getShare(), DocumentQueueControlPojo.class); BasicDBObject docQuery1 = new BasicDBObject(DocumentPojo._id_, new BasicDBObject(DbManager.in_, queue.getQueueList())); BasicDBObject docQuery2 = new BasicDBObject(DocumentPojo.updateId_, new BasicDBObject(DbManager.in_, queue.getQueueList())); BasicDBObject docQuery = new BasicDBObject(DbManager.or_, Arrays.asList(docQuery1, docQuery2)); DBCursor dbc = DbManager.getDocument().getMetadata().find(docQuery).limit(query.score.numAnalyze); ScoringUtils scoreStats = new ScoringUtils(); List<BasicDBObject> docs = null; StatisticsPojo stats = new StatisticsPojo(); stats.setSavedScores(query.output.docs.skip, dbc.count()); try { boolean lockAcquired = true; try { lockAcquired = this.acquireConcurrentAccessLock(); } catch (InterruptedException e) { //(that's fine just carry on) lockAcquired = false; } if (!lockAcquired) { rp.setResponse( new ResponseObject("Query", false, "Query engine busy, please try again later.")); return rp; } scoreStats.setAliasLookupTable(_aliasLookup); docs = scoreStats.calcTFIDFAndFilter(DbManager.getDocument().getMetadata(), dbc, query.score, query.output, stats, false, query.output.docs.skip, query.output.docs.numReturn, communityIdStrs, null, null, null, null, null, null, null, null); } finally { scoreStats.clearAsMuchMemoryAsPossible(); this.releaseConcurrentAccessLock(); } rp = new ResponsePojo(); rp.setResponse(new ResponseObject("Query", true, "Saved Query: " + share.getTitle())); rp.setStats(stats); if ((null != docs) && (docs.size() > 0)) { rp.setData(docs, (BasePojoApiMap<BasicDBObject>) null); } else { // (ensure there's always an empty list) docs = new ArrayList<BasicDBObject>(0); rp.setData(docs, (BasePojoApiMap<BasicDBObject>) null); } } //end if user or saved query queue } return rp; }
From source file:com.ikanow.infinit.e.api.knowledge.SearchHandler.java
License:Open Source License
/** * Performs a reverse geolookup, takes a lat/lon and returns a list of nearby * locations/*from w ww .j av a2 s . com*/ * * @param latitude * @param longitude * @return */ private List<SearchSuggestPojo> reverseGeoLookup(Double latitude, Double longitude) { List<SearchSuggestPojo> locations = null; BasicDBList results = runGeoNear(latitude, longitude); if (results != null) { locations = new ArrayList<SearchSuggestPojo>(); if (results.size() > 0) { for (int i = 0; i < 10 && i < results.size(); i++) { BasicDBObject result = (BasicDBObject) results.get(i); Double distance = result.getDouble("dis"); BasicDBObject obj = (BasicDBObject) result.get("obj"); locations.add(buildLocation(obj, distance)); } } } return locations; }
From source file:com.ikanow.infinit.e.api.social.sharing.ShareHandler.java
License:Open Source License
private String getReferenceString(SharePojo share) { // FILE:// w w w . ja va2 s .c om if (null == share.getDocumentLocation().get_id()) { // local file based reference FileInputStream fin = null; Scanner s = null; try { File f = new File(share.getDocumentLocation().getCollection()); fin = new FileInputStream(f); s = new Scanner(fin, "UTF-8"); return (s.useDelimiter("\n").next()); } catch (Exception e) { return null; } finally { try { if (null != fin) fin.close(); if (null != s) s.close(); } catch (Exception e) { } // (probably just never opened) } } // DB: // Carry on, this is a database object HashSet<String> shareIdStrs = new HashSet<String>(); for (ShareCommunityPojo commIds : share.getCommunities()) { shareIdStrs.add(commIds.get_id().toString()); } String retVal = null; BasicDBObject query = new BasicDBObject(DocumentPojo._id_, share.getDocumentLocation().get_id()); // (same for all artifacts) String dbName = share.getDocumentLocation().getDatabase(); String collectionName = share.getDocumentLocation().getCollection(); BasicDBObject returnVal = (BasicDBObject) MongoDbManager.getCollection(dbName, collectionName) .findOne(query); try { BasicDBList communities = null; boolean bCustomJob = dbName.equals("custommr"); // (a bit different) boolean bFoundOverlap = false; if (!bCustomJob) { ObjectId communityId = (ObjectId) returnVal.get(DocumentPojo.communityId_); // (same for other artifacts) bFoundOverlap = shareIdStrs.contains(communityId.toString()); } else { communities = (BasicDBList) returnVal.get("communityIds"); // (shared across multiple json types) for (Object commIdObj : communities) { ObjectId commId = (ObjectId) commIdObj; if (shareIdStrs.contains(commId.toString())) { bFoundOverlap = true; break; } } } if (!bFoundOverlap) { throw new RuntimeException(""); // (turned into the common message below) } if (!bCustomJob) { // everything but custom jobs Date modifiedTime = returnVal.getDate(DocumentPojo.modified_); // (same for other artifacts) if (null != modifiedTime) { share.setModified(modifiedTime); } retVal = returnVal.toString(); } else { // custom jobs String database = returnVal.getString(CustomMapReduceJobPojo.outputDatabase_); if (null == database) { database = dbName; } Date modifiedTime = returnVal.getDate(CustomMapReduceJobPojo.lastCompletionTime_); if (null != modifiedTime) { share.setModified(modifiedTime); } String collection = returnVal.getString(CustomMapReduceJobPojo.outputCollection_); BasicDBObject returnVal2 = (BasicDBObject) MongoDbManager.getCollection(database, collection) .findOne(); retVal = returnVal2.toString(); } } catch (Exception e) { throw new RuntimeException("Document not found or permission issue (no overlapping communities)"); } return retVal; }
From source file:com.ikanow.infinit.e.application.utils.LogstashConfigUtils.java
License:Open Source License
public static String validateLogstashInput(String sourceKey, String config, StringBuffer errorMessage, boolean isAdmin) { if (null == _props) { _props = new PropertiesManager(); String allowedInputs = _props.getProperty("harvest.logstash.allowed_inputs"); if ((null == allowedInputs) || (allowedInputs.isEmpty())) { allowedInputs = "collectd,drupal_dblog,gelf,gemfire,imap,irc,lumberjack,s3,snmptrap,sqs,syslog,twitter,udp,xmpp,zenoss"; // currently *not* allowed by default: elasticsearch,eventlog,exec,file,ganglia,generator,graphite,heroku,jmx,log4j,pipe,puppet_facter,rabbitmq,redit,relp,sqlite,stdin,stomp,tcp,unix,varnishlog,websocket,wmi,zeromq }// ww w. j a v a 2s . c om _allowedInputs.addAll(Arrays.asList(allowedInputs.toLowerCase().split("\\s*,\\s*"))); String allowedFilters = _props.getProperty("harvest.logstash.allowed_filters"); if ((null == allowedFilters) || (allowedFilters.isEmpty())) { allowedFilters = "advisor,alter,anonymize,checksum,cidr,cipher,clone,collate,csv,date,dns,drop,elapsed,extractnumbers,fingerprint,geoip,gelfify,grep,grok,grokdiscovery,l18n,json,json_encode,kv,metaevent,metrics,multiline,mutate,noop,prune,punct,railsparallelrequest,range,sleep,split,sumnumbers,syslog_pri,throttle,translate,unique,urldecode,useragent,uuid,wms,wmts,xml"; // currently *not* allowed by default: elasticsearch,ruby,zeromq } _allowedFilters.addAll(Arrays.asList(allowedFilters.toLowerCase().split("\\s*,\\s*"))); } //TESTED (3_2a) // Configuration validation, phase 1 errorMessage.append("Validation error:"); BasicDBObject jsonifiedConfig = parseLogstashConfig(config, errorMessage); if (null == jsonifiedConfig) { return null; } errorMessage.setLength(0); // Configuration validation, phase 2 - very basic checks on the structure of the object Object input = jsonifiedConfig.get("input"); if ((null == input) || !(input instanceof BasicDBObject)) { // Does input exist? errorMessage.append( "Invalid input format, should be 'input { INPUT_TYPE { ... } }' (only one INPUT_TYPE) and also contain a filter, no \"s around them. (0)"); return null; } //TESTED (3_1d) else { // Check there's only one input type and (unless admin) it's one of the allowed types BasicDBObject inputDbo = (BasicDBObject) input; if (1 != inputDbo.size()) { errorMessage.append( "Invalid input format, should be 'input { INPUT_TYPE { ... } }' (only one INPUT_TYPE) and also contain a filter, no \"s around them. (1)"); return null; } //TESTED if (!isAdmin) { for (String key : inputDbo.keySet()) { if (!_allowedInputs.contains(key.toLowerCase())) { errorMessage.append("Security error, non-admin not allowed input type " + key + ", allowed options: " + _allowedInputs.toString()); return null; } //TESTED } } //TESTED (3_1abc) } Object filter = jsonifiedConfig.get("filter"); if ((null == filter) || !(filter instanceof BasicDBObject)) { // Does filter exist? errorMessage.append( "Invalid input format, should be 'input { INPUT_TYPE { ... } }' (only one INPUT_TYPE) and also contain a filter, no \"s around them. (2)"); return null; } //TESTED (3_2d) else { // Check there's only one input type and (unless admin) it's one of the allowed types if (!isAdmin) { BasicDBObject filterDbo = (BasicDBObject) filter; for (String key : filterDbo.keySet()) { if (!_allowedFilters.contains(key.toLowerCase())) { errorMessage.append("Security error, non-admin not allowed filter type " + key + ", allowed options: " + _allowedFilters.toString()); return null; } //TESTED } } //TESTED (3_2abc) } // Configuration validation, phase 3 Matcher m = null; m = _validationRegexInputReplace.matcher(config); if (!m.find()) { errorMessage.append( "Invalid input format, should be 'input { INPUT_TYPE { ... } }' (only one INPUT_TYPE) and also contain a filter, no \"s around them. (3)"); return null; } //TESTED (see above) else { // If admin check on allowed types String inputType = m.group(2).toLowerCase(); // If it's a file-based plugin then replace sincedb_path (check that it's not used during the JSON-ification): if (inputType.equalsIgnoreCase("file") || inputType.equalsIgnoreCase("s3")) { config = _validationRegexInputReplace.matcher(config) .replaceFirst("$1\n sincedb_path => \"_XXX_DOTSINCEDB_XXX_\"\n"); } //TESTED } //TESTED m = _validationRegexNoSourceKey.matcher(config); // (this won't help malicious changes to source key, but will let people know they're not supposed to) if (m.find()) { errorMessage.append( "Not allowed to reference sourceKey - this is automatically appended by the logstash harvester"); return null; } //TESTED // OK now need to append the sourceKey at each stage of the pipeline to really really ensure that nobody sets sourceKey to be different m = _validationRegexAppendFields.matcher(config); StringBuffer newConfig = new StringBuffer(); if (m.find()) { m.appendReplacement(newConfig, "add_field => [ \"sourceKey\", \"" + sourceKey + "\"] \n\n" + m.group() + " \n if [sourceKey] == \"" + sourceKey + "\" { \n\n "); } else { errorMessage.append( "Invalid input format, should be 'input { INPUT_TYPE { ... } }' (only one INPUT_TYPE) and also contain a filter, no \"s around them. (4)"); return null; } m.appendTail(newConfig); config = newConfig.toString(); config = config.replaceAll("}[^}]*$", ""); // (remove the last }) config += "\n\n mutate { update => [ \"sourceKey\", \"" + sourceKey + "\"] } \n}\n}\n"; // double check the sourceKey hasn't been overwritten and close the if from above //TESTED (syntactically correct and does overwrite sourceKey everywhere - success_2_2) return config; }