List of usage examples for com.mongodb BasicDBObject toString
@SuppressWarnings("deprecation") public String toString()
Returns a JSON serialization of this object
The output will look like: {"a":1, "b":["x","y","z"]} }
From source file:com.ikanow.infinit.e.api.social.sharing.ShareHandler.java
License:Open Source License
private String getReferenceString(SharePojo share) { // FILE:// w w w . j a v a 2 s.co m if (null == share.getDocumentLocation().get_id()) { // local file based reference FileInputStream fin = null; Scanner s = null; try { File f = new File(share.getDocumentLocation().getCollection()); fin = new FileInputStream(f); s = new Scanner(fin, "UTF-8"); return (s.useDelimiter("\n").next()); } catch (Exception e) { return null; } finally { try { if (null != fin) fin.close(); if (null != s) s.close(); } catch (Exception e) { } // (probably just never opened) } } // DB: // Carry on, this is a database object HashSet<String> shareIdStrs = new HashSet<String>(); for (ShareCommunityPojo commIds : share.getCommunities()) { shareIdStrs.add(commIds.get_id().toString()); } String retVal = null; BasicDBObject query = new BasicDBObject(DocumentPojo._id_, share.getDocumentLocation().get_id()); // (same for all artifacts) String dbName = share.getDocumentLocation().getDatabase(); String collectionName = share.getDocumentLocation().getCollection(); BasicDBObject returnVal = (BasicDBObject) MongoDbManager.getCollection(dbName, collectionName) .findOne(query); try { BasicDBList communities = null; boolean bCustomJob = dbName.equals("custommr"); // (a bit different) boolean bFoundOverlap = false; if (!bCustomJob) { ObjectId communityId = (ObjectId) returnVal.get(DocumentPojo.communityId_); // (same for other artifacts) bFoundOverlap = shareIdStrs.contains(communityId.toString()); } else { communities = (BasicDBList) returnVal.get("communityIds"); // (shared across multiple json types) for (Object commIdObj : communities) { ObjectId commId = (ObjectId) commIdObj; if (shareIdStrs.contains(commId.toString())) { bFoundOverlap = true; break; } } } if (!bFoundOverlap) { throw new RuntimeException(""); // (turned into the common message below) } if (!bCustomJob) { // everything but custom jobs Date modifiedTime = returnVal.getDate(DocumentPojo.modified_); // (same for other artifacts) if (null != modifiedTime) { share.setModified(modifiedTime); } retVal = returnVal.toString(); } else { // custom jobs String database = returnVal.getString(CustomMapReduceJobPojo.outputDatabase_); if (null == database) { database = dbName; } Date modifiedTime = returnVal.getDate(CustomMapReduceJobPojo.lastCompletionTime_); if (null != modifiedTime) { share.setModified(modifiedTime); } String collection = returnVal.getString(CustomMapReduceJobPojo.outputCollection_); BasicDBObject returnVal2 = (BasicDBObject) MongoDbManager.getCollection(database, collection) .findOne(); retVal = returnVal2.toString(); } } catch (Exception e) { throw new RuntimeException("Document not found or permission issue (no overlapping communities)"); } return retVal; }
From source file:com.ikanow.infinit.e.application.handlers.polls.LogstashSourceDeletionPollHandler.java
License:Apache License
@Override public void performPoll() { boolean isSlave = false; if (null == LOGSTASH_CONFIG) { // (static memory not yet initialized) try {/* w ww . j a v a 2 s.com*/ Thread.sleep(1000); // (extend the sleep time a bit) } catch (Exception e) { } return; } File logstashDirectory = new File(LOGSTASH_CONFIG); String slaveHostname = null; if (!logstashDirectory.isDirectory() || !logstashDirectory.canRead() || !logstashDirectory.canWrite()) { logstashDirectory = new File(LOGSTASH_CONFIG_DISTRIBUTED); isSlave = true; if (!logstashDirectory.isDirectory() || !logstashDirectory.canRead() || !logstashDirectory.canWrite()) { try { Thread.sleep(10000); // (extend the sleep time a bit) } catch (Exception e) { } return; } try { slaveHostname = java.net.InetAddress.getLocalHost().getHostName(); } catch (Exception e) { // too complex if we don't have a hostname, just return return; } } // Deletion of distributed sources requires some co-ordination, we'll do it in master if (isSlave) { // register my existence BasicDBObject existence = new BasicDBObject("_id", slaveHostname); existence.put("ping", new Date()); DbManager.getIngest().getLogHarvesterSlaves().save(existence); } //TESTED (by hand) else { // MASTER: clear out old slaves // (if it hasn't pinged for more than 30 minutes) long now = new Date().getTime(); BasicDBObject deadSlaveQuery = new BasicDBObject("ping", new BasicDBObject(DbManager.lt_, new Date(now - 1000L * 1800L))); boolean found = false; DBCursor dbc = DbManager.getIngest().getLogHarvesterSlaves().find(deadSlaveQuery); while (dbc.hasNext()) { BasicDBObject deadSlave = (BasicDBObject) dbc.next(); found = true; String hostname = deadSlave.getString("_id"); if (null != hostname) { DbManager.getIngest().getLogHarvesterQ().remove(new BasicDBObject("forSlave", hostname)); _logger.info("Removing unresponsive slave host=" + hostname); } } if (found) { DbManager.getIngest().getLogHarvesterSlaves().remove(deadSlaveQuery); } } //TESTED (by hand) // Read delete elements from the Q... if (null == _logHarvesterQ) { _logHarvesterQ = new MongoQueue(DbManager.getIngest().getLogHarvesterQ().getDB().getName(), DbManager.getIngest().getLogHarvesterQ().getName()); } BasicDBObject queueQuery = new BasicDBObject("deleteOnlyCommunityId", new BasicDBObject(DbManager.exists_, true)); if (!isSlave) { // only get master messages queueQuery.put("forSlave", new BasicDBObject(DbManager.exists_, false)); } else { // only get messages intended for me queueQuery.put("forSlave", slaveHostname); } DBObject nextElement = _logHarvesterQ.pop(queueQuery); LinkedList<TestLogstashExtractorPojo> secondaryQueue = new LinkedList<TestLogstashExtractorPojo>(); LinkedList<String> deleteAfterRestartQueue = new LinkedList<String>(); boolean deletedSources = false; boolean deletedSinceDbs = false; while (nextElement != null) { //DEBUG //System.out.println("HOST: " + slaveHostname + ": RECEIVED: " + nextElement.toString() + " FROM " + queueQuery); _logger.info("host=" + slaveHostname + " received=" + nextElement.toString() + " from=" + queueQuery); TestLogstashExtractorPojo testInfo = TestLogstashExtractorPojo.fromDb(nextElement, TestLogstashExtractorPojo.class); if (null == testInfo.sourceKey) { continue; // need a sourceKey parameter... } if (!isSlave) { // slaves don't need to delete anything from the index, only files secondaryQueue.add(testInfo); } //(end if master) try { // First off - need to remove the conf file and restart logstash if we're actually deleting this... boolean deletedSource = false; if ((null == testInfo.deleteDocsOnly) || !testInfo.deleteDocsOnly) { // (default = delete entire source) deletedSources = true; deletedSource = true; String fileToDelete = new StringBuffer(LOGSTASH_CONFIG).append(testInfo._id.toString()) .append(LOGSTASH_CONFIG_EXTENSION).toString(); boolean deleted = false; try { deleted = new File(fileToDelete).delete(); } catch (Exception e) { } //DEBUG //System.out.println("DELETED CONF FILE" + fileToDelete + " ? " + deleted); _logger.info("delete conf_file=" + fileToDelete + " success=" + deleted); } //TESTED (docs-only + source deletion) // If _not_ deleting the source, then do delete the sincedb file // (else let it get cleaned up separately - minimizes race conditions where the source starts ingesting again) String fileToDelete = new StringBuffer(LOGSTASH_WD).append(".sincedb_") .append(testInfo._id.toString()).toString(); if (!deletedSource) { boolean deleted = false; try { deleted = new File(fileToDelete).delete(); deletedSinceDbs |= deleted; } catch (Exception e) { } //DEBUG //System.out.println("DELETED SINCEDB" + fileToDelete + " ? " + deletedSinceDb); _logger.info("primary delete sincedb_file=" + fileToDelete + " success=" + deleted); } else { deleteAfterRestartQueue.add(fileToDelete); } //TESTED (primary + secondary deletes) } catch (Exception e) { //e.printStackTrace(); } // probably just doesn't exist // Get next element and carry on nextElement = _logHarvesterQ.pop(queueQuery); } //TESTED (end first loop over elements to delete) if (deletedSources || deletedSinceDbs) { // this file actually existed - need to restart the logstash unfortunately _logger.info("Restarting logstash, and sleeping until logstash is restarted"); try { new File(LOGSTASH_RESTART_FILE).createNewFile(); for (int i = 0; i < 12; ++i) { Thread.sleep(10L * 1000L); if (!new File(LOGSTASH_RESTART_FILE).exists()) { Thread.sleep(5L * 1000L); // (extra wait for it to shut down) break; // (early exit) } } } catch (Exception e) { } } //TESTED (from doc deletion and from src deletion) for (String fileToDelete : deleteAfterRestartQueue) { boolean deleted = false; try { deleted = new File(fileToDelete).delete(); } catch (Exception e) { } //DEBUG //System.out.println("DELETED SINCEDB" + fileToDelete + " ? " + deletedSinceDb); _logger.info("secondary delete sincedb_file=" + fileToDelete + " success=" + deleted); } //TESTED (primary and secondary deletion) for (TestLogstashExtractorPojo testInfo : secondaryQueue) { String commIdStr = testInfo.deleteOnlyCommunityId.toString(); // Get all the indexes that might need to be cleansed: ElasticSearchManager indexMgr = ElasticSearchManager.getIndex(DUMMY_INDEX); // Stashed index ArrayList<String> indices = new ArrayList<String>(); String stashedIndex = "recs_" + commIdStr; ClusterStateResponse retVal = indexMgr.getRawClient().admin().cluster().prepareState() .setIndices(stashedIndex).setRoutingTable(false).setNodes(false).setListenerThreaded(false) .get(); if (!retVal.getState().getMetaData().getIndices().isEmpty()) { indices.add(stashedIndex); } // (else doesn't exist...) // Live indexes: String indexPattern = new StringBuffer("recs_t_").append(commIdStr).append("*").toString(); retVal = indexMgr.getRawClient().admin().cluster().prepareState().setIndices(indexPattern) .setRoutingTable(false).setNodes(false).setListenerThreaded(false).get(); for (IndexMetaData indexMetadata : retVal.getState().getMetaData()) { //DEBUG //System.out.println("INDEX=" + indexMetadata.index()); indices.add(indexMetadata.index()); } deleteSourceKeyRecords(indexMgr, indices.toArray(new String[0]), testInfo.sourceKey); _logger.info("Deleted key=" + testInfo.sourceKey + " from indexes=" + ArrayUtils.toString(indices.toArray())); // Now I've deleted, go and distribute the deletion messages to the slaves if ((null != testInfo.distributed) && testInfo.distributed) { // Copy into the slaves' queue DBCursor dbc = DbManager.getIngest().getLogHarvesterSlaves().find(); while (dbc.hasNext()) { BasicDBObject slave = (BasicDBObject) dbc.next(); testInfo.forSlave = slave.getString("_id"); _logHarvesterQ.push(testInfo.toDb()); testInfo.forSlave = null; //DEBUG //System.out.println("DISTRIBUTING DELETION MESSAGE TO " + slave.toString()); _logger.info("distributing deletion message to host=" + slave.toString()); } } //TESTED (by hand) } //(end loop over secondary queue, ie to actually delete the indexes) }
From source file:com.ikanow.infinit.e.application.utils.LogstashConfigUtils.java
License:Open Source License
public static void main(String[] args) throws IOException { System.out.println(Arrays.toString(args)); Globals.setIdentity(com.ikanow.infinit.e.data_model.Globals.Identity.IDENTITY_API); Globals.overrideConfigLocation(args[0]); // 1) Errored sources - things that break the formatting StringBuffer errors = new StringBuffer(); String testName;//from w w w. ja v a2s .c o m // 1.1) {} mismatch 1 //a errors.setLength(0); testName = "error_1_1a"; if (null != parseLogstashConfig(getTestFile(testName), errors)) { System.out.println("**** FAIL " + testName); } else if (!errors.toString().startsWith("{} Mismatch (})")) { System.out.println("**** FAIL " + testName + ": " + errors.toString()); } //b errors.setLength(0); testName = "error_1_1b"; if (null != parseLogstashConfig(getTestFile(testName), errors)) { System.out.println("**** FAIL " + testName); } else if (!errors.toString().startsWith("{} Mismatch (})")) { System.out.println("**** FAIL " + testName + ": " + errors.toString()); } //c errors.setLength(0); testName = "error_1_1c"; if (null != parseLogstashConfig(getTestFile(testName), errors)) { System.out.println("**** FAIL " + testName); } else if (!errors.toString().startsWith("{} Mismatch (})")) { System.out.println("**** FAIL " + testName + ": " + errors.toString()); } // 1.2) {} mismatch 2 //a errors.setLength(0); testName = "error_1_2a"; if (null != parseLogstashConfig(getTestFile(testName), errors)) { System.out.println("**** FAIL " + testName); } else if (!errors.toString().startsWith("{} Mismatch ({)")) { System.out.println("**** FAIL " + testName + ": " + errors.toString()); } // 1.3) multiple input/filter blocks // 1.3a) input errors.setLength(0); testName = "error_1_3a"; if (null != parseLogstashConfig(getTestFile(testName), errors)) { System.out.println("**** FAIL " + testName); } else if (!errors.toString().equals("Multiple input or filter blocks: input")) { System.out.println("**** FAIL " + testName + ": " + errors.toString()); } // 1.3b) filter errors.setLength(0); testName = "error_1_3b"; if (null != parseLogstashConfig(getTestFile(testName), errors)) { System.out.println("**** FAIL " + testName); } else if (!errors.toString().equals("Multiple input or filter blocks: filter")) { System.out.println("**** FAIL " + testName + ": " + errors.toString()); } // 1.4) unrecognized blocks // a output - special case errors.setLength(0); testName = "error_1_4a"; if (null != parseLogstashConfig(getTestFile(testName), errors)) { System.out.println("**** FAIL " + testName); } else if (!errors.toString() .equals("Not allowed output blocks - these are appended automatically by the logstash harvester")) { System.out.println("**** FAIL " + testName + ": " + errors.toString()); } // b errors.setLength(0); testName = "error_1_4b"; if (null != parseLogstashConfig(getTestFile(testName), errors)) { System.out.println("**** FAIL " + testName); } else if (!errors.toString().equals("Unrecognized processing block: something_random")) { System.out.println("**** FAIL " + testName + ": " + errors.toString()); } // 1.5) fields/sub-elements that are not permitted // a ... sincedb_path errors.setLength(0); testName = "error_1_5a"; if (null != parseLogstashConfig(getTestFile(testName), errors)) { System.out.println("**** FAIL " + testName); } else if (!errors.toString().equals("Not allowed sincedb_path in input.* block")) { System.out.println("**** FAIL " + testName + ": " + errors.toString()); } // b ... filter as sub-path of input errors.setLength(0); testName = "error_1_5b"; if (null != parseLogstashConfig(getTestFile(testName), errors)) { System.out.println("**** FAIL " + testName); } else if (!errors.toString().equals("Not allowed sub-elements of input called 'filter' (1)")) { System.out.println("**** FAIL " + testName + ": " + errors.toString()); } // c ... filter as sub-path of sub-element of input errors.setLength(0); testName = "error_1_5c"; if (null != parseLogstashConfig(getTestFile(testName), errors)) { System.out.println("**** FAIL " + testName); } else if (!errors.toString().equals("Not allowed sub-elements of input called 'filter' (2)")) { System.out.println("**** FAIL " + testName + ": " + errors.toString()); } // 2) Valid formatted source BasicDBObject retVal; String output; String inputName; // (for re-using config files across text) //2.1) errors.setLength(0); testName = "success_2_1"; if (null == (retVal = parseLogstashConfig(getTestFile(testName), errors))) { System.out.println("**** FAIL " + testName + ": " + errors.toString()); } else if (!retVal.toString().equals( "{ \"input\" : { \"file\" : [ { \"path\" : { } , \"start_position\" : { } , \"type\" : { } , \"codec.multiline\" : { }}]} , \"filter\" : { \"csv\" : [ { \"columns\" : { }}] , \"drop\" : [ { }] , \"mutate\" : [ { \"convert\" : { }} , { \"add_fields\" : { }} , { \"rename\" : { }}] , \"date\" : [ { \"timezone\" : { } , \"match\" : { }}] , \"geoip\" : [ { \"source\" : { } , \"fields\" : { }}]}}")) { System.out.println("**** FAIL " + testName + ": " + retVal.toString()); } //System.out.println("(val="+retVal+")"); // 2.2 errors.setLength(0); testName = "success_2_2"; if (null == (retVal = parseLogstashConfig(getTestFile(testName), errors))) { System.out.println("**** FAIL " + testName + ": " + errors.toString()); } if (null == MongoDbUtil.getProperty(retVal, "filter.geoip.fields")) { System.out.println("**** FAIL " + testName + ": " + retVal); } //System.out.println(retVal); //2.3) - check that the sincedb is added correctly, plus the sourceKey manipulation // (USE success_2_1 for this) errors.setLength(0); testName = "inputs_2_3"; inputName = "success_2_3"; if (null == (output = validateLogstashInput(testName, getTestFile(inputName), errors, true))) { System.out.println("**** FAIL " + testName + ": errored: " + errors); } else { String outputToTest = output.replaceAll("[\n\r]", "\\\\n").replaceAll("\\s+", " "); String testAgainst = "input {\n\n file {\n sincedb_path => \"_XXX_DOTSINCEDB_XXX_\"\n\n\n path => \"/root/odin-poc-data/proxy_logs/may_known_cnc.csv\"\n\n start_position => beginning\n\n type => \"proxy_logs\"\n\n codec => multiline {\n\n pattern => \"^%{YEAR}-%{MONTHNUM}-%{MONTHDAY}%{DATA:summary}\"\n\n negate => true\n\n what => \"previous\"\n\n } \n\n add_field => [ \"sourceKey\", \"inputs_2_3\"] \n\n}\n\n}\n\n\n\nfilter { \n if [sourceKey] == \"inputs_2_3\" { \n\n \n\n if [type] == \"proxy_logs\" {\n\n csv {\n\n columns => [\"Device_Name\",\"SimpleDate\",\"Event_#Date\",\"Source_IP\",\"Source_Port\",\"Destination_IP\",\"Destination_Port\",\"Protocol\",\"Vendor_Alert\",\"MSS_Action\",\"Logging_Device_IP\",\"Application\",\"Bytes_Received\",\"Bytes_Sent\",\"Dest._Country\",\"Message\",\"Message_Type\",\"MSS_Log_Source_IP\",\"MSS_Log_Source_Type\",\"MSS_Log_Source_UUID\",\"network_protocol_id\",\"OS_Type\",\"PIX_Main-Code\",\"PIX_Sub-Code\",\"Port\",\"Product_ID\",\"Product\",\"Rule\",\"Rule_Identifier\",\"Sensor_Name\",\"Class\",\"Translate_Destination_IP\",\"Translate_Destination_Port\",\"Translate_Source_IP\"]\n\n }\n\n if [Device_Name] == \"Device Name\" {\n\n drop {}\n\n }\n\n mutate {\n\n convert => [ \"Bytes_Received\", \"integer\" ]\n\n convert => [ \"Bytes_Sent\", \"integer\" ]\n\n }\n\n date {\n\n timezone => \"Europe/London\"\n\n match => [ \"Event_Date\" , \"yyyy-MM-dd'T'HH:mm:ss\" ]\n\n }\n\n geoip {\n\n source => \"Destination_IP\"\n\n fields => [\"timezone\",\"location\",\"latitude\",\"longitude\"]\n\n }\n\n }\n\n\n\n mutate { update => [ \"sourceKey\", \"inputs_2_3\"] } \n}\n}\n"; testAgainst = testAgainst.replaceAll("[\n\r]", "\\\\n").replaceAll("\\s+", " "); if (!outputToTest.equals(testAgainst)) { System.out.println("**** FAIL " + testName + ": " + output); } } // 3) Valid formatted source, access to restricted types // 3.1) input // a) restricted - admin // (USE success_2_1 for this) errors.setLength(0); testName = "inputs_3_1a"; inputName = "success_2_1"; if (null != (output = validateLogstashInput(testName, getTestFile(inputName), errors, false))) { System.out.println("**** FAIL " + testName + ": Should have errored: " + output); } else if (!errors.toString() .startsWith("Security error, non-admin not allowed input type file, allowed options: ")) { System.out.println("**** FAIL " + testName + ": " + errors.toString()); } // b) restricted - non admin // (USE success_2_1 for this) errors.setLength(0); testName = "inputs_3_1b"; inputName = "success_2_1"; if (null == (output = validateLogstashInput(testName, getTestFile(inputName), errors, true))) { System.out.println("**** FAIL " + testName + ": " + errors.toString()); } // c) unrestricted - non admin errors.setLength(0); testName = "inputs_3_1c"; inputName = "inputs_3_1c"; if (null == (output = validateLogstashInput(testName, getTestFile(inputName), errors, true))) { System.out.println("**** FAIL " + testName + ": " + errors.toString()); } //System.out.println("(val="+output+")"); // d) no input at all errors.setLength(0); testName = "inputs_3_1d"; inputName = "inputs_3_1d"; if (null != (output = validateLogstashInput(testName, getTestFile(inputName), errors, false))) { System.out.println("**** FAIL " + testName + ": Should have errored: " + output); } else if (!errors.toString().startsWith( "Invalid input format, should be 'input { INPUT_TYPE { ... } }' (only one INPUT_TYPE) and also contain a filter, no \"s around them.")) { System.out.println("**** FAIL " + testName + ": " + errors.toString()); } // 3.2) filter // a) restricted - admin errors.setLength(0); testName = "filters_3_2a"; inputName = "filters_3_2a"; if (null != (output = validateLogstashInput(testName, getTestFile(inputName), errors, false))) { System.out.println("**** FAIL " + testName + ": Should have errored: " + output); } else if (!errors.toString() .startsWith("Security error, non-admin not allowed filter type elasticsearch, allowed options: ")) { System.out.println("**** FAIL " + testName + ": " + errors.toString()); } //System.out.println("(err="+errors.toString()+")"); // b) restricted - non admin // (USE filters_3_2a for this) errors.setLength(0); testName = "filters_3_2a"; inputName = "filters_3_2a"; if (null == (output = validateLogstashInput(testName, getTestFile(inputName), errors, true))) { System.out.println("**** FAIL " + testName + ": " + errors.toString()); } //System.out.println("(val="+output+")"); // c) unrestricted - non admin // (implicitly tested via 3.1bc) // d) no filter at all errors.setLength(0); testName = "filters_3_2d"; inputName = "filters_3_2d"; if (null != (output = validateLogstashInput(testName, getTestFile(inputName), errors, false))) { System.out.println("**** FAIL " + testName + ": Should have errored: " + output); } else if (!errors.toString().startsWith( "Invalid input format, should be 'input { INPUT_TYPE { ... } }' (only one INPUT_TYPE) and also contain a filter, no \"s around them.")) { System.out.println("**** FAIL " + testName + ": " + errors.toString()); } }
From source file:com.ikanow.infinit.e.core.mapreduce.HadoopJobRunner.java
License:Open Source License
private void createConfigXML(Writer out, String title, String input, String fields, boolean isCustomTable, String outputDatabase, String output, String tempOutputCollection, String mapper, String reducer, String combiner, String query, List<ObjectId> communityIds, String outputKey, String outputValue, String arguments) throws IOException { String dbserver = prop_general.getDatabaseServer(); output = outputDatabase + "." + tempOutputCollection; int nSplits = 8; int nDocsPerSplit = 12500; //add communities to query if this is not a custom table if (!isCustomTable) { // Start with the old query: BasicDBObject oldQueryObj = null; if (query.startsWith("{")) { oldQueryObj = (BasicDBObject) com.mongodb.util.JSON.parse(query); } else {/* ww w . j a va 2 s . c om*/ oldQueryObj = new BasicDBObject(); } // Community Ids aren't indexed in the metadata collection, but source keys are, so we need to transform to that BasicDBObject keyQuery = new BasicDBObject(SourcePojo.communityIds_, new BasicDBObject(DbManager.in_, communityIds)); boolean bAdminOverride = false; if (oldQueryObj.containsField("admin")) { // For testing only... if (1 == communityIds.size()) { ObjectId communityId = communityIds.get(0); if (RESTTools.adminLookup(communityId.toString())) { bAdminOverride = true; if (oldQueryObj.containsField("max.splits")) { nSplits = oldQueryObj.getInt("max.splits"); } if (oldQueryObj.containsField("max.docs.per.split")) { nDocsPerSplit = oldQueryObj.getInt("max.docs.per.split"); } } } } //(end diagnostic/benchmarking/test code for admins only part 1) if (bAdminOverride) { oldQueryObj = (BasicDBObject) oldQueryObj.get("admin"); //(end diagnostic/benchmarking/test code for admins only part 2) } else if (oldQueryObj.containsField(DocumentPojo.sourceKey_) || input.startsWith("feature.")) { // Source Key specified by user, stick communityIds check in for security oldQueryObj.put(DocumentPojo.communityId_, new BasicDBObject(DbManager.in_, communityIds)); } else { // Source key not specified by user, transform communities->sourcekeys BasicDBObject keyFields = new BasicDBObject(SourcePojo.key_, 1); DBCursor dbc = MongoDbManager.getIngest().getSource().find(keyQuery, keyFields); if (dbc.count() > 500) { // (too many source keys let's keep the query size sensible...) oldQueryObj.put(DocumentPojo.communityId_, new BasicDBObject(DbManager.in_, communityIds)); } else { HashSet<String> sourceKeys = new HashSet<String>(); while (dbc.hasNext()) { DBObject dbo = dbc.next(); String sourceKey = (String) dbo.get(SourcePojo.key_); if (null != sourceKey) { sourceKeys.add(sourceKey); } } if (sourceKeys.isEmpty()) { // query returns empty throw new RuntimeException("Communities contain no sources"); } BasicDBObject newQueryClauseObj = new BasicDBObject(DbManager.in_, sourceKeys); // Now combine the queries... oldQueryObj.put(DocumentPojo.sourceKey_, newQueryClauseObj); } // (end if too many source keys across the communities) } //(end if need to break source keys down into communities) query = oldQueryObj.toString(); } else { //get the custom table (and database) input = getCustomDbAndCollection(input); } if (arguments == null) arguments = ""; // Generic configuration out.write("<?xml version=\"1.0\"?>\n<configuration>"); // Mongo specific configuration out.write("\n\t<property><!-- name of job shown in jobtracker --><name>mongo.job.name</name><value>" + title + "</value></property>" + "\n\t<property><!-- run the job verbosely ? --><name>mongo.job.verbose</name><value>true</value></property>" + "\n\t<property><!-- Run the job in the foreground and wait for response, or background it? --><name>mongo.job.background</name><value>false</value></property>" + "\n\t<property><!-- If you are reading from mongo, the URI --><name>mongo.input.uri</name><value>mongodb://" + dbserver + "/" + input + "</value></property>" + "\n\t<property><!-- If you are writing to mongo, the URI --><name>mongo.output.uri</name><value>mongodb://" + dbserver + "/" + output + "</value> </property>" + "\n\t<property><!-- The query, in JSON, to execute [OPTIONAL] --><name>mongo.input.query</name><value>" + query + "</value></property>" + "\n\t<property><!-- The fields, in JSON, to read [OPTIONAL] --><name>mongo.input.fields</name><value>" + ((fields == null) ? ("") : fields) + "</value></property>" + "\n\t<property><!-- A JSON sort specification for read [OPTIONAL] --><name>mongo.input.sort</name><value></value></property>" + "\n\t<property><!-- The number of documents to limit to for read [OPTIONAL] --><name>mongo.input.limit</name><value>0</value><!-- 0 == no limit --></property>" + "\n\t<property><!-- The number of documents to skip in read [OPTIONAL] --><!-- TODO - Are we running limit() or skip() first? --><name>mongo.input.skip</name><value>0</value> <!-- 0 == no skip --></property>" + "\n\t<property><!-- Class for the mapper --><name>mongo.job.mapper</name><value>" + mapper + "</value></property>" + "\n\t<property><!-- Reducer class --><name>mongo.job.reducer</name><value>" + reducer + "</value></property>" + "\n\t<property><!-- InputFormat Class --><name>mongo.job.input.format</name><value>com.ikanow.infinit.e.data_model.custom.InfiniteMongoInputFormat</value></property>" + "\n\t<property><!-- OutputFormat Class --><name>mongo.job.output.format</name><value>com.mongodb.hadoop.MongoOutputFormat</value></property>" + "\n\t<property><!-- Output key class for the output format --><name>mongo.job.output.key</name><value>" + outputKey + "</value></property>" + "\n\t<property><!-- Output value class for the output format --><name>mongo.job.output.value</name><value>" + outputValue + "</value></property>" + "\n\t<property><!-- Output key class for the mapper [optional] --><name>mongo.job.mapper.output.key</name><value></value></property>" + "\n\t<property><!-- Output value class for the mapper [optional] --><name>mongo.job.mapper.output.value</name><value></value></property>" + "\n\t<property><!-- Class for the combiner [optional] --><name>mongo.job.combiner</name><value>" + combiner + "</value></property>" + "\n\t<property><!-- Partitioner class [optional] --><name>mongo.job.partitioner</name><value></value></property>" + "\n\t<property><!-- Sort Comparator class [optional] --><name>mongo.job.sort_comparator</name><value></value></property>" + "\n\t<property><!-- Split Size [optional] --><name>mongo.input.split_size</name><value>32</value></property>"); // Infinit.e specific configuration out.write("\n\t<property><!-- User Arguments [optional] --><name>arguments</name><value>" + StringEscapeUtils.escapeXml(arguments) + "</value></property>" + "\n\t<property><!-- Maximum number of splits [optional] --><name>max.splits</name><value>" + nSplits + "</value></property>" + "\n\t<property><!-- Maximum number of docs per split [optional] --><name>max.docs.per.split</name><value>" + nDocsPerSplit + "</value></property>"); // Closing thoughts: out.write("\n</configuration>"); out.flush(); out.close(); }
From source file:com.ikanow.infinit.e.data_model.api.ResponsePojo.java
License:Apache License
public static ResponsePojo fromDb(BasicDBObject bson) { BasicDBObject bson2 = new BasicDBObject(); bson2.put("stats", bson.get("stats")); bson2.put("response", bson.get("response")); ResponsePojo rp = ResponsePojo.fromApi(bson2.toString(), ResponsePojo.class); // Now all the elements! Object evtTimeline = null, facets = null, times = null, entities = null, events = null, facts = null, summaries = null, sources = null, sourceMetaTags = null, sourceMetaTypes = null, moments = null, other = null;/* ww w. j a v a 2 s . co m*/ evtTimeline = bson.get("eventsTimeline"); facets = bson.get("facets"); times = bson.get("times"); entities = bson.get("entities"); events = bson.get("events"); facts = bson.get("facts"); summaries = bson.get("summaries"); sources = bson.get("sources"); sourceMetaTags = bson.get("sourceMetatags"); sourceMetaTypes = bson.get("sourceMetaTypes"); moments = bson.get("moments"); other = bson.get("other"); rp.setEventsTimeline(evtTimeline); rp.setFacets(facets); rp.setTimes(times, rp.getTimeInterval() == null ? 0 : rp.getTimeInterval()); rp.setEntities(entities); rp.setEvents(events); rp.setFacts(facts); rp.setSummaries(summaries); rp.setSources(sources); rp.setSourceMetaTags(sourceMetaTags); rp.setSourceMetaTypes(sourceMetaTypes); rp.setMoments(moments, rp.getMomentInterval()); rp.setOther(other); // The main data object is discarded in the original fromApi() call, so put it back now Object docData = bson.get("data"); if (null != docData) { rp.setData((BasicDBList) docData, (BasePojoApiMap<BasicDBList>) null); } else { // (ensure there's always an empty list) rp.setData(new ArrayList<BasicDBObject>(0), (BasePojoApiMap<BasicDBObject>) null); } return rp; }
From source file:com.ikanow.infinit.e.data_model.custom.InfiniteMongoInputFormat.java
License:Apache License
@Override public List<InputSplit> getSplits(JobContext context) { final Configuration hadoopConfiguration = context.getConfiguration(); final InfiniteMongoConfig conf = new InfiniteMongoConfig(hadoopConfiguration); List<InputSplit> splits = InfiniteMongoSplitter.calculateSplits(conf); if (conf.getSelfMerge() != null) { //check if we need to grab existing records and add them to the splits final Configuration existingConfiguration = context.getConfiguration(); existingConfiguration.set("mongo.input.uri", conf.getSelfMerge()); BasicDBObject query = new BasicDBObject(); //add on this query to only get items previous to now if no reducer is specified (otherwise //we will leak any items we map on the first run back in before this split runs) if (context.getNumReduceTasks() == 0) query.put("_id", new BasicDBObject(MongoDbManager.lt_, new ObjectId())); existingConfiguration.set("mongo.input.query", query.toString()); final InfiniteMongoConfig existingConf = new InfiniteMongoConfig(existingConfiguration); splits.addAll(InfiniteMongoSplitter.calculateSplits(existingConf)); }//from ww w .j a v a2s . c om return splits; }
From source file:com.ikanow.infinit.e.data_model.driver.InfiniteDriver.java
License:Apache License
public Set<String> updateAliases(Collection<EntityFeaturePojo> aliasesToUpdate, String communityIdStr, boolean bUpsert, Map<String, List<SharePojo>> aliasMapping, ResponseObject response) { if (null == aliasMapping) { aliasMapping = new HashMap<String, List<SharePojo>>(); this.getAliases(communityIdStr, aliasMapping, response); if (!response.isSuccess()) { return null; }/*from w w w. j a v a2s . com*/ } //TESTED Map<ObjectId, BasicDBObject> shareContentCache = new HashMap<ObjectId, BasicDBObject>(); List<SharePojo> sharesToUpdate = new LinkedList<SharePojo>(); // Step through the aliases, update the content // Loop 1 update SharePojo shareForNewAliases = null; Set<String> erroredAliases = new HashSet<String>(); HashMultimap<ObjectId, String> shareToAliasMapping = HashMultimap.create(); for (EntityFeaturePojo alias : aliasesToUpdate) { List<SharePojo> sharesForThisAlias = aliasMapping.get(alias.getIndex()); if ((null == sharesForThisAlias) && bUpsert) { // This is a new alias and not ignoring upserts if (null == shareForNewAliases) { // Haven't yet assigned such a share shareForNewAliases = this.upsertSharePrep(communityIdStr, shareContentCache, aliasMapping); if (null == shareForNewAliases) { erroredAliases.add(alias.getIndex()); continue; } sharesToUpdate.add(shareForNewAliases); } BasicDBObject shareContent = shareContentCache.get(shareForNewAliases.get_id()); // (exists by construction) shareContent.put(alias.getIndex(), alias.toDb()); shareToAliasMapping.put(shareForNewAliases.get_id(), alias.getIndex()); } //TESTED else if (null != sharesForThisAlias) { for (SharePojo share : sharesForThisAlias) { BasicDBObject shareContent = shareContentCache.get(share.get_id()); if (null == shareContent) { try { String json = share.getShare(); shareContent = (BasicDBObject) JSON.parse(json); shareContentCache.put(share.get_id(), shareContent); sharesToUpdate.add(share); } catch (Exception e) { erroredAliases.add(alias.getIndex()); } } //TESTED shareContent.put(alias.getIndex(), alias.toDb()); shareToAliasMapping.put(share.get_id(), alias.getIndex()); } //TESTED } else { erroredAliases.add(alias.getIndex()); } // end loop over updating shares } //end loop over aliases // Loop 2 now update all the shares boolean bSucceededUpdatingSomething = false; for (SharePojo share : sharesToUpdate) { BasicDBObject shareContent = shareContentCache.get(share.get_id()); // (exists by construction) String shareIdStr = share.get_id().toString(); this.updateShareJSON(shareIdStr, share.getTitle(), share.getDescription(), "infinite-entity-alias", shareContent.toString(), response); bSucceededUpdatingSomething |= response.isSuccess(); if (!response.isSuccess()) { Set<String> failedAliases = shareToAliasMapping.get(share.get_id()); if (null != failedAliases) { erroredAliases.addAll(failedAliases); } } } //TESTED response.setSuccess(bSucceededUpdatingSomething); return erroredAliases; }
From source file:com.ikanow.infinit.e.data_model.driver.InfiniteDriver.java
License:Apache License
public Set<String> removeAliases(Collection<String> aliasesToRemove, String communityIdStr, Map<String, List<SharePojo>> aliasMapping, ResponseObject response) { if (null == aliasMapping) { aliasMapping = new HashMap<String, List<SharePojo>>(); this.getAliases(communityIdStr, aliasMapping, response); if (!response.isSuccess()) { return null; }/* w ww. ja v a 2s. c o m*/ } //TESTED Map<ObjectId, BasicDBObject> shareContentCache = new HashMap<ObjectId, BasicDBObject>(); List<SharePojo> sharesToUpdate = new LinkedList<SharePojo>(); // Step through the aliases, update the content // Loop 1 update Set<String> erroredAliases = new HashSet<String>(); HashMultimap<ObjectId, String> shareToAliasMapping = HashMultimap.create(); for (String alias : aliasesToRemove) { List<SharePojo> sharesForThisAlias = aliasMapping.get(alias); if (null != sharesForThisAlias) { for (SharePojo share : sharesForThisAlias) { BasicDBObject shareContent = shareContentCache.get(share.get_id()); if (null == shareContent) { try { String json = share.getShare(); shareContent = (BasicDBObject) JSON.parse(json); shareContentCache.put(share.get_id(), shareContent); sharesToUpdate.add(share); } catch (Exception e) { erroredAliases.add(alias); } } //TESTED shareContent.remove(alias); shareToAliasMapping.put(share.get_id(), alias); } //TESTED } // end loop over updating shares } //end loop over aliases // Loop 2 now update all the shares boolean bSucceededUpdatingSomething = false; for (SharePojo share : sharesToUpdate) { BasicDBObject shareContent = shareContentCache.get(share.get_id()); // (exists by construction) String shareIdStr = share.get_id().toString(); if (shareContent.isEmpty()) { // Remove the share this.removeShare(shareIdStr, response); if (!response.isSuccess()) { Set<String> failedAliases = shareToAliasMapping.get(share.get_id()); if (null != failedAliases) { erroredAliases.addAll(failedAliases); } } } //TESTED else { this.updateShareJSON(shareIdStr, share.getTitle(), share.getDescription(), "infinite-entity-alias", shareContent.toString(), response); bSucceededUpdatingSomething |= response.isSuccess(); if (!response.isSuccess()) { Set<String> failedAliases = shareToAliasMapping.get(share.get_id()); if (null != failedAliases) { erroredAliases.addAll(failedAliases); } } } //TESTED } //TESTED response.setSuccess(bSucceededUpdatingSomething); return erroredAliases; }
From source file:com.ikanow.infinit.e.processing.custom.launcher.CustomHadoopTaskLauncher.java
License:Open Source License
private void createConfigXML(Writer out, String title, String input, String fields, boolean isCustomTable, String outputDatabase, String output, String tempOutputCollection, String mapper, String reducer, String combiner, String query, List<ObjectId> communityIds, String outputKey, String outputValue, String arguments, Boolean incrementalMode, ObjectId userId, Boolean selfMerge, String originalOutputCollection, Boolean appendResults) throws IOException { String dbserver = prop_general.getDatabaseServer(); output = outputDatabase + "." + tempOutputCollection; boolean isAdmin = AuthUtils.isAdmin(userId); int nSplits = 8; int nDocsPerSplit = 12500; //add communities to query if this is not a custom table BasicDBObject oldQueryObj = null; BasicDBObject srcTags = null;/*from w w w. ja v a 2 s . c om*/ // Start with the old query: if (query.startsWith("{")) { oldQueryObj = (BasicDBObject) com.mongodb.util.JSON.parse(query); } else { oldQueryObj = new BasicDBObject(); } boolean elasticsearchQuery = oldQueryObj.containsField("qt") && !isCustomTable; int nLimit = 0; if (oldQueryObj.containsField("$limit")) { nLimit = oldQueryObj.getInt("$limit"); oldQueryObj.remove("$limit"); } if (oldQueryObj.containsField("$splits")) { nSplits = oldQueryObj.getInt("$splits"); oldQueryObj.remove("$splits"); } if (oldQueryObj.containsField("$srctags")) { srcTags = new BasicDBObject(SourcePojo.tags_, oldQueryObj.get("$srctags")); oldQueryObj.remove("$srctags"); } if (bLocalMode) { // If in local mode, then set this to a large number so we always run inside our limit/split version // (since for some reason MongoInputFormat seems to fail on large collections) nSplits = InfiniteMongoSplitter.MAX_SPLITS; } if (oldQueryObj.containsField("$docsPerSplit")) { nDocsPerSplit = oldQueryObj.getInt("$docsPerSplit"); oldQueryObj.remove("$docsPerSplit"); } oldQueryObj.remove("$fields"); oldQueryObj.remove("$output"); oldQueryObj.remove("$reducers"); String mapperKeyClass = oldQueryObj.getString("$mapper_key_class", ""); String mapperValueClass = oldQueryObj.getString("$mapper_value_class", ""); oldQueryObj.remove("$mapper_key_class"); oldQueryObj.remove("$mapper_value_class"); String cacheList = null; Object cacheObj = oldQueryObj.get("$caches"); if (null != cacheObj) { cacheList = cacheObj.toString(); // (either array of strings, or single string) if (!cacheList.startsWith("[")) { cacheList = "[" + cacheList + "]"; // ("must" now be valid array) } oldQueryObj.remove("$caches"); } //TESTED if (null != nDebugLimit) { // (debug mode override) nLimit = nDebugLimit; } boolean tmpIncMode = (null != incrementalMode) && incrementalMode; Date fromOverride = null; Date toOverride = null; Object fromOverrideObj = oldQueryObj.remove("$tmin"); Object toOverrideObj = oldQueryObj.remove("$tmax"); if (null != fromOverrideObj) { fromOverride = InfiniteHadoopUtils.dateStringFromObject(fromOverrideObj, true); } if (null != toOverrideObj) { toOverride = InfiniteHadoopUtils.dateStringFromObject(toOverrideObj, false); } if (!isCustomTable) { if (elasticsearchQuery) { oldQueryObj.put("communityIds", communityIds); //tmin/tmax not supported - already have that capability as part of the query } else { if (input.equals("feature.temporal")) { if ((null != fromOverride) || (null != toOverride)) { oldQueryObj.put("value.maxTime", InfiniteHadoopUtils.createDateRange(fromOverride, toOverride, true)); } //TESTED oldQueryObj.put("_id.c", new BasicDBObject(DbManager.in_, communityIds)); } else { oldQueryObj.put(DocumentPojo.communityId_, new BasicDBObject(DbManager.in_, communityIds)); if ((null != fromOverride) || (null != toOverride)) { oldQueryObj.put("_id", InfiniteHadoopUtils.createDateRange(fromOverride, toOverride, false)); } //TESTED if (input.equals("doc_metadata.metadata")) { oldQueryObj.put(DocumentPojo.index_, new BasicDBObject(DbManager.ne_, "?DEL?")); // (ensures not soft-deleted) } } } } else { if ((null != fromOverride) || (null != toOverride)) { oldQueryObj.put("_id", InfiniteHadoopUtils.createDateRange(fromOverride, toOverride, false)); } //TESTED //get the custom table (and database) input = CustomOutputManager.getCustomDbAndCollection(input); } query = oldQueryObj.toString(); if (arguments == null) arguments = ""; // Generic configuration out.write("<?xml version=\"1.0\"?>\n<configuration>"); // Mongo specific configuration out.write("\n\t<property><!-- name of job shown in jobtracker --><name>mongo.job.name</name><value>" + title + "</value></property>" + "\n\t<property><!-- run the job verbosely ? --><name>mongo.job.verbose</name><value>true</value></property>" + "\n\t<property><!-- Run the job in the foreground and wait for response, or background it? --><name>mongo.job.background</name><value>false</value></property>" + "\n\t<property><!-- If you are reading from mongo, the URI --><name>mongo.input.uri</name><value>mongodb://" + dbserver + "/" + input + "</value></property>" + "\n\t<property><!-- If you are writing to mongo, the URI --><name>mongo.output.uri</name><value>mongodb://" + dbserver + "/" + output + "</value> </property>" + "\n\t<property><!-- The query, in JSON, to execute [OPTIONAL] --><name>mongo.input.query</name><value>" + StringEscapeUtils.escapeXml(query) + "</value></property>" + "\n\t<property><!-- The fields, in JSON, to read [OPTIONAL] --><name>mongo.input.fields</name><value>" + ((fields == null) ? ("") : fields) + "</value></property>" + "\n\t<property><!-- A JSON sort specification for read [OPTIONAL] --><name>mongo.input.sort</name><value></value></property>" + "\n\t<property><!-- The number of documents to limit to for read [OPTIONAL] --><name>mongo.input.limit</name><value>" + nLimit + "</value><!-- 0 == no limit --></property>" + "\n\t<property><!-- The number of documents to skip in read [OPTIONAL] --><!-- TODO - Are we running limit() or skip() first? --><name>mongo.input.skip</name><value>0</value> <!-- 0 == no skip --></property>" + "\n\t<property><!-- Class for the mapper --><name>mongo.job.mapper</name><value>" + mapper + "</value></property>" + "\n\t<property><!-- Reducer class --><name>mongo.job.reducer</name><value>" + reducer + "</value></property>" + "\n\t<property><!-- InputFormat Class --><name>mongo.job.input.format</name><value>com.ikanow.infinit.e.data_model.custom.InfiniteMongoInputFormat</value></property>" + "\n\t<property><!-- OutputFormat Class --><name>mongo.job.output.format</name><value>com.ikanow.infinit.e.data_model.custom.InfiniteMongoOutputFormat</value></property>" + "\n\t<property><!-- Output key class for the output format --><name>mongo.job.output.key</name><value>" + outputKey + "</value></property>" + "\n\t<property><!-- Output value class for the output format --><name>mongo.job.output.value</name><value>" + outputValue + "</value></property>" + "\n\t<property><!-- Output key class for the mapper [optional] --><name>mongo.job.mapper.output.key</name><value>" + mapperKeyClass + "</value></property>" + "\n\t<property><!-- Output value class for the mapper [optional] --><name>mongo.job.mapper.output.value</name><value>" + mapperValueClass + "</value></property>" + "\n\t<property><!-- Class for the combiner [optional] --><name>mongo.job.combiner</name><value>" + combiner + "</value></property>" + "\n\t<property><!-- Partitioner class [optional] --><name>mongo.job.partitioner</name><value></value></property>" + "\n\t<property><!-- Sort Comparator class [optional] --><name>mongo.job.sort_comparator</name><value></value></property>" + "\n\t<property><!-- Split Size [optional] --><name>mongo.input.split_size</name><value>32</value></property>"); // Infinit.e specific configuration out.write("\n\t<property><!-- User Arguments [optional] --><name>infinit.e.userid</name><value>" + StringEscapeUtils.escapeXml(userId.toString()) + "</value></property>" + "\n\t<property><!-- User Arguments [optional] --><name>arguments</name><value>" + StringEscapeUtils.escapeXml(arguments) + "</value></property>" + "\n\t<property><!-- Maximum number of splits [optional] --><name>max.splits</name><value>" + nSplits + "</value></property>" + "\n\t<property><!-- Maximum number of docs per split [optional] --><name>max.docs.per.split</name><value>" + nDocsPerSplit + "</value></property>" + "\n\t<property><!-- Infinit.e incremental mode [optional] --><name>update.incremental</name><value>" + tmpIncMode + "</value></property>" + "\n\t<property><!-- Infinit.e quick admin check [optional] --><name>infinit.e.is.admin</name><value>" + isAdmin + "</value></property>" + "\n\t<property><!-- Infinit.e userid [optional] --><name>infinit.e.userid</name><value>" + userId + "</value></property>"); if (null != cacheList) { out.write( "\n\t<property><!-- Infinit.e cache list [optional] --><name>infinit.e.cache.list</name><value>" + cacheList + "</value></property>"); } //TESTED if (null != srcTags) { out.write( "\n\t<property><!-- Infinit.e src tags filter [optional] --><name>infinit.e.source.tags.filter</name><value>" + srcTags.toString() + "</value></property>"); } if (null != selfMerge && selfMerge && originalOutputCollection != null) { originalOutputCollection = "mongodb://" + dbserver + "/" + outputDatabase + "." + originalOutputCollection; out.write( "\n\t<property><!-- This jobs output collection for passing into the mapper along with input collection [optional] --><name>infinit.e.selfMerge</name><value>" + originalOutputCollection + "</value></property>"); } // Closing thoughts: out.write("\n</configuration>"); out.flush(); out.close(); }
From source file:com.ikanow.infinit.e.processing.custom.utils.InfiniteElasticsearchHadoopUtils.java
License:Apache License
public static void handleElasticsearchInput(CustomMapReduceJobPojo job, Configuration config, BasicDBObject advancedConfigurationDbo) { // Pull out type list: Object o = advancedConfigurationDbo.remove("$types"); String[] types = null;// w ww . j a va2 s. c o m if (null != o) { if (o instanceof BasicDBList) { types = ((BasicDBList) o).toArray(new String[0]); } else if (o instanceof String) { types = ((String) o).split("\\s*,\\s*"); } } //TESTED (by hand) //QUERY: // Date override: Date fromOverride = null; Date toOverride = null; Object fromOverrideObj = advancedConfigurationDbo.remove("$tmin"); Object toOverrideObj = advancedConfigurationDbo.remove("$tmax"); if (null != fromOverrideObj) { fromOverride = InfiniteHadoopUtils.dateStringFromObject(fromOverrideObj, true); } if (null != toOverrideObj) { toOverride = InfiniteHadoopUtils.dateStringFromObject(toOverrideObj, false); } Boolean streaming = null; Object streamingObj = advancedConfigurationDbo.remove("$streaming"); if (streamingObj instanceof Boolean) { streaming = (Boolean) streamingObj; } //DEBUG //System.out.println("QUERY = " + advancedConfigurationDbo.toString()); BasicDBObject newQuery = new BasicDBObject(); Object queryObj = advancedConfigurationDbo.get("query"); if (queryObj instanceof String) { config.set("es.query", queryObj.toString()); // URL version) if ((null != fromOverride) || (null != toOverride)) { throw new RuntimeException( "Can't specify $tmin/$tmax shortcut in conjunction with 'URL' query type"); } //TESTED } else if (null != queryObj) { newQuery.put("query", queryObj); Object filterObj = advancedConfigurationDbo.get("filter"); if (null != filterObj) newQuery.put("filter", filterObj); // (doesn't matter if it doesn't exist) Object fieldsObj = advancedConfigurationDbo.get("fields"); if (null != fieldsObj) newQuery.put("fields", fieldsObj); // (doesn't matter if it doesn't exist) Object sizeObj = advancedConfigurationDbo.get("size"); if (null != sizeObj) newQuery.put("size", sizeObj); // (doesn't matter if it doesn't exist) if ((null != fromOverride) || (null != toOverride)) { if (null == filterObj) { BasicDBObject filterRangeParamsDbo = new BasicDBObject(); if (null != fromOverride) { filterRangeParamsDbo.put("gte", fromOverride.getTime()); } if (null != toOverride) { filterRangeParamsDbo.put("lte", toOverride.getTime()); } BasicDBObject filterRangeDbo = new BasicDBObject("@timestamp", filterRangeParamsDbo); BasicDBObject filterDbo = new BasicDBObject("range", filterRangeDbo); newQuery.put("filter", filterDbo); } else { // combine filter throw new RuntimeException( "Can't (currently) specify $tmin/$tmax shortcut in conjunction with filter"); } //TESTED } config.set("es.query", newQuery.toString()); } //(else no query == match all) //COMMUNITIES Pattern dateRegex = null; ThreadSafeSimpleDateFormat tssdf = null; if ((null != fromOverride) || (null != toOverride)) { dateRegex = Pattern.compile("[0-9]{4}[.][0-9]{2}[.][0-9]{2}"); tssdf = new ThreadSafeSimpleDateFormat("yyyy.MM.dd"); } //TESTED StringBuffer overallIndexNames = new StringBuffer(); for (ObjectId commId : job.communityIds) { StringBuffer indexNames = new StringBuffer(); //TODO (INF-2641): need to handle: //c) anyway to sub-query?! (look for communityIds term?!) if (null == streaming) { indexNames.append("recs_*").append(commId.toString()).append("*"); } else if (streaming) { indexNames.append("recs_t_").append(commId.toString()).append("*"); } else {// !streaming indexNames.append("recs_").append(commId.toString()); } //TESTED StringBuffer decomposedIndexes = new StringBuffer(); boolean needDecomposedIndexes = false; HashSet<String> typesAdded = new HashSet<String>(); if ((null != types) && (null == fromOverride) && (null == toOverride)) { // (types manual, no date filtering - can be much simpler) for (String s : types) typesAdded.add(s); } else { // (All this oddly written code is to minimize the number of es types that get exposed, because // they are really badly behaved in terms of bw compatbility) if (null != types) { for (String s : types) typesAdded.add(s); } ElasticSearchManager indexMgr = ElasticSearchManager.getIndex("doc_dummy"); // (index guaranteed to exist) Object[] indexMetaObj = indexMgr.getRawClient().admin().cluster().prepareState() .setIndices(indexNames.toString()).setRoutingTable(false).setNodes(false) .setListenerThreaded(false).get().getState().getMetaData().getIndices().values().toArray(); if (null != indexMetaObj) for (Object oo : indexMetaObj) { IndexMetaData indexMeta = (IndexMetaData) oo; String indexName = indexMeta.getIndex(); if ((null != fromOverride) || (null != toOverride)) { //DEBUG //System.out.println("INDEX: " + indexName); Matcher m = dateRegex.matcher(indexName); if (m.find()) { try { Date d = tssdf.parse(m.group()); long endpoint = d.getTime() + 24L * 3600L * 1000L - 1; //DEBUG //System.out.println("***************** COMPARE: " + d + " FROM " + fromOverride + " TO " + toOverride + "..errr . " + m.group()); if (null != fromOverride) { if (endpoint < fromOverride.getTime()) { // no overlap on the left needDecomposedIndexes = true; continue; } } //TESTED if (null != toOverride) { if (d.getTime() > toOverride.getTime()) { // no overlap on the right needDecomposedIndexes = true; continue; } } //TESTED } catch (ParseException e) { // just carry on, odd index name, it happens needDecomposedIndexes = true; continue; } } } //TESTED (end loop over time checking) if (null == types) { Iterator<String> typesIt = indexMeta.getMappings().keysIt(); while (typesIt.hasNext()) { String type = typesIt.next(); if (!type.equals("_default_")) { typesAdded.add(type); } } } if (0 != decomposedIndexes.length()) { decomposedIndexes.append(','); } decomposedIndexes.append(indexName); } //(end loop over indexes) } //(end if need to derive the types from the indexes) if (needDecomposedIndexes) { // (because we filtered some indexes out) indexNames = decomposedIndexes; } if (0 == indexNames.length()) { continue; // nothing to do here... } int numTypesAdded = 0; if (typesAdded.isEmpty()) { // there doesn't seem to be any types associated with this set of indexes continue; // (ie don't add) } else for (String type : typesAdded) { if (numTypesAdded > 0) { indexNames.append(","); } else { indexNames.append("/"); } numTypesAdded++; indexNames.append(type); } if (overallIndexNames.length() > 0) { overallIndexNames.append(",,"); } overallIndexNames.append(indexNames); } //(end loop over community) //TESTED (by hand) if (0 == overallIndexNames.length()) { throw new RuntimeException( "Communities contained no types, either all indexes empty, or index is corrupt"); } //TESTED (by hand) //DEBUG //System.out.println("INDEXES = " + overallIndexNames.toString()); config.set("es.resource", overallIndexNames.toString()); config.set("es.index.read.missing.as.empty", "yes"); //proxy if running in debug mode: if (InfiniteEsInputFormat.LOCAL_DEBUG_MODE) { config.set("es.net.proxy.http.host", "localhost"); config.set("es.net.proxy.http.port", "8888"); } //TESTED (by hand) }