Example usage for com.mongodb BasicDBObject toString

Introduction

In this page you can find the example usage for com.mongodb BasicDBObject toString.

Prototype

@SuppressWarnings("deprecation")
public String toString()

Source Link

Document

Returns a JSON serialization of this object

The output will look like: {"a":1, "b":["x","y","z"]} }

Usage

From source file:com.ikanow.infinit.e.api.social.sharing.ShareHandler.java

License:Open Source License

private String getReferenceString(SharePojo share) {
    // FILE://  w  w w  .  j a  v a  2  s.co m
    if (null == share.getDocumentLocation().get_id()) { // local file based reference
        FileInputStream fin = null;
        Scanner s = null;
        try {
            File f = new File(share.getDocumentLocation().getCollection());
            fin = new FileInputStream(f);
            s = new Scanner(fin, "UTF-8");
            return (s.useDelimiter("\n").next());
        } catch (Exception e) {
            return null;
        } finally {
            try {
                if (null != fin)
                    fin.close();
                if (null != s)
                    s.close();
            } catch (Exception e) {
            } // (probably just never opened)               
        }
    }
    // DB:
    // Carry on, this is a database object
    HashSet<String> shareIdStrs = new HashSet<String>();
    for (ShareCommunityPojo commIds : share.getCommunities()) {
        shareIdStrs.add(commIds.get_id().toString());
    }
    String retVal = null;
    BasicDBObject query = new BasicDBObject(DocumentPojo._id_, share.getDocumentLocation().get_id()); // (same for all artifacts)
    String dbName = share.getDocumentLocation().getDatabase();
    String collectionName = share.getDocumentLocation().getCollection();
    BasicDBObject returnVal = (BasicDBObject) MongoDbManager.getCollection(dbName, collectionName)
            .findOne(query);
    try {
        BasicDBList communities = null;
        boolean bCustomJob = dbName.equals("custommr"); // (a bit different)
        boolean bFoundOverlap = false;
        if (!bCustomJob) {
            ObjectId communityId = (ObjectId) returnVal.get(DocumentPojo.communityId_); // (same for other artifacts)
            bFoundOverlap = shareIdStrs.contains(communityId.toString());
        } else {
            communities = (BasicDBList) returnVal.get("communityIds"); // (shared across multiple json types)
            for (Object commIdObj : communities) {
                ObjectId commId = (ObjectId) commIdObj;
                if (shareIdStrs.contains(commId.toString())) {
                    bFoundOverlap = true;
                    break;
                }
            }
        }
        if (!bFoundOverlap) {
            throw new RuntimeException(""); // (turned into the common message below)
        }
        if (!bCustomJob) { // everything but custom jobs
            Date modifiedTime = returnVal.getDate(DocumentPojo.modified_); // (same for other artifacts)
            if (null != modifiedTime) {
                share.setModified(modifiedTime);
            }
            retVal = returnVal.toString();
        } else { // custom jobs
            String database = returnVal.getString(CustomMapReduceJobPojo.outputDatabase_);
            if (null == database) {
                database = dbName;
            }
            Date modifiedTime = returnVal.getDate(CustomMapReduceJobPojo.lastCompletionTime_);
            if (null != modifiedTime) {
                share.setModified(modifiedTime);
            }
            String collection = returnVal.getString(CustomMapReduceJobPojo.outputCollection_);
            BasicDBObject returnVal2 = (BasicDBObject) MongoDbManager.getCollection(database, collection)
                    .findOne();
            retVal = returnVal2.toString();
        }
    } catch (Exception e) {
        throw new RuntimeException("Document not found or permission issue (no overlapping communities)");
    }
    return retVal;
}

From source file:com.ikanow.infinit.e.application.handlers.polls.LogstashSourceDeletionPollHandler.java

License:Apache License

@Override
public void performPoll() {

    boolean isSlave = false;

    if (null == LOGSTASH_CONFIG) { // (static memory not yet initialized)
        try {/* w  ww . j a  v a 2 s.com*/
            Thread.sleep(1000); // (extend the sleep time a bit)
        } catch (Exception e) {
        }
        return;
    }

    File logstashDirectory = new File(LOGSTASH_CONFIG);
    String slaveHostname = null;
    if (!logstashDirectory.isDirectory() || !logstashDirectory.canRead() || !logstashDirectory.canWrite()) {
        logstashDirectory = new File(LOGSTASH_CONFIG_DISTRIBUTED);
        isSlave = true;
        if (!logstashDirectory.isDirectory() || !logstashDirectory.canRead() || !logstashDirectory.canWrite()) {
            try {
                Thread.sleep(10000); // (extend the sleep time a bit)
            } catch (Exception e) {
            }
            return;
        }
        try {
            slaveHostname = java.net.InetAddress.getLocalHost().getHostName();
        } catch (Exception e) { // too complex if we don't have a hostname, just return
            return;
        }
    }

    // Deletion of distributed sources requires some co-ordination, we'll do it in master

    if (isSlave) { // register my existence
        BasicDBObject existence = new BasicDBObject("_id", slaveHostname);
        existence.put("ping", new Date());
        DbManager.getIngest().getLogHarvesterSlaves().save(existence);
    } //TESTED (by hand)
    else { // MASTER: clear out old slaves
        // (if it hasn't pinged for more than 30 minutes)
        long now = new Date().getTime();
        BasicDBObject deadSlaveQuery = new BasicDBObject("ping",
                new BasicDBObject(DbManager.lt_, new Date(now - 1000L * 1800L)));
        boolean found = false;
        DBCursor dbc = DbManager.getIngest().getLogHarvesterSlaves().find(deadSlaveQuery);
        while (dbc.hasNext()) {
            BasicDBObject deadSlave = (BasicDBObject) dbc.next();
            found = true;
            String hostname = deadSlave.getString("_id");
            if (null != hostname) {
                DbManager.getIngest().getLogHarvesterQ().remove(new BasicDBObject("forSlave", hostname));

                _logger.info("Removing unresponsive slave host=" + hostname);
            }
        }
        if (found) {
            DbManager.getIngest().getLogHarvesterSlaves().remove(deadSlaveQuery);
        }
    } //TESTED (by hand)

    // Read delete elements from the Q...

    if (null == _logHarvesterQ) {
        _logHarvesterQ = new MongoQueue(DbManager.getIngest().getLogHarvesterQ().getDB().getName(),
                DbManager.getIngest().getLogHarvesterQ().getName());
    }
    BasicDBObject queueQuery = new BasicDBObject("deleteOnlyCommunityId",
            new BasicDBObject(DbManager.exists_, true));
    if (!isSlave) { // only get master messages
        queueQuery.put("forSlave", new BasicDBObject(DbManager.exists_, false));
    } else { // only get messages intended for me
        queueQuery.put("forSlave", slaveHostname);
    }
    DBObject nextElement = _logHarvesterQ.pop(queueQuery);
    LinkedList<TestLogstashExtractorPojo> secondaryQueue = new LinkedList<TestLogstashExtractorPojo>();
    LinkedList<String> deleteAfterRestartQueue = new LinkedList<String>();
    boolean deletedSources = false;
    boolean deletedSinceDbs = false;
    while (nextElement != null) {
        //DEBUG
        //System.out.println("HOST: " + slaveHostname + ": RECEIVED: " + nextElement.toString() + " FROM " + queueQuery);
        _logger.info("host=" + slaveHostname + " received=" + nextElement.toString() + " from=" + queueQuery);

        TestLogstashExtractorPojo testInfo = TestLogstashExtractorPojo.fromDb(nextElement,
                TestLogstashExtractorPojo.class);
        if (null == testInfo.sourceKey) {
            continue; // need a sourceKey parameter...
        }
        if (!isSlave) { // slaves don't need to delete anything from the index, only files
            secondaryQueue.add(testInfo);
        } //(end if master)

        try {
            // First off - need to remove the conf file and restart logstash if we're actually deleting this...
            boolean deletedSource = false;
            if ((null == testInfo.deleteDocsOnly) || !testInfo.deleteDocsOnly) { // (default = delete entire source)
                deletedSources = true;
                deletedSource = true;

                String fileToDelete = new StringBuffer(LOGSTASH_CONFIG).append(testInfo._id.toString())
                        .append(LOGSTASH_CONFIG_EXTENSION).toString();

                boolean deleted = false;
                try {
                    deleted = new File(fileToDelete).delete();
                } catch (Exception e) {
                }

                //DEBUG
                //System.out.println("DELETED CONF FILE" + fileToDelete + " ? " + deleted);
                _logger.info("delete conf_file=" + fileToDelete + " success=" + deleted);
            } //TESTED (docs-only + source deletion)

            // If _not_ deleting the source, then do delete the sincedb file
            // (else let it get cleaned up separately - minimizes race conditions where the source starts ingesting again)
            String fileToDelete = new StringBuffer(LOGSTASH_WD).append(".sincedb_")
                    .append(testInfo._id.toString()).toString();
            if (!deletedSource) {

                boolean deleted = false;
                try {
                    deleted = new File(fileToDelete).delete();
                    deletedSinceDbs |= deleted;
                } catch (Exception e) {
                }

                //DEBUG
                //System.out.println("DELETED SINCEDB" + fileToDelete + " ? " + deletedSinceDb);
                _logger.info("primary delete sincedb_file=" + fileToDelete + " success=" + deleted);
            } else {
                deleteAfterRestartQueue.add(fileToDelete);
            } //TESTED (primary + secondary deletes)

        } catch (Exception e) {
            //e.printStackTrace();            
        } // probably just doesn't exist            

        // Get next element and carry on
        nextElement = _logHarvesterQ.pop(queueQuery);

    } //TESTED (end first loop over elements to delete)

    if (deletedSources || deletedSinceDbs) { // this file actually existed - need to restart the logstash unfortunately
        _logger.info("Restarting logstash, and sleeping until logstash is restarted");
        try {
            new File(LOGSTASH_RESTART_FILE).createNewFile();
            for (int i = 0; i < 12; ++i) {
                Thread.sleep(10L * 1000L);
                if (!new File(LOGSTASH_RESTART_FILE).exists()) {
                    Thread.sleep(5L * 1000L); // (extra wait for it to shut down)
                    break; // (early exit)
                }
            }
        } catch (Exception e) {
        }
    } //TESTED (from doc deletion and from src deletion)

    for (String fileToDelete : deleteAfterRestartQueue) {
        boolean deleted = false;
        try {
            deleted = new File(fileToDelete).delete();
        } catch (Exception e) {
        }

        //DEBUG
        //System.out.println("DELETED SINCEDB" + fileToDelete + " ? " + deletedSinceDb);
        _logger.info("secondary delete sincedb_file=" + fileToDelete + " success=" + deleted);
    } //TESTED (primary and secondary deletion)

    for (TestLogstashExtractorPojo testInfo : secondaryQueue) {

        String commIdStr = testInfo.deleteOnlyCommunityId.toString();

        // Get all the indexes that might need to be cleansed:
        ElasticSearchManager indexMgr = ElasticSearchManager.getIndex(DUMMY_INDEX);

        // Stashed index

        ArrayList<String> indices = new ArrayList<String>();

        String stashedIndex = "recs_" + commIdStr;
        ClusterStateResponse retVal = indexMgr.getRawClient().admin().cluster().prepareState()
                .setIndices(stashedIndex).setRoutingTable(false).setNodes(false).setListenerThreaded(false)
                .get();

        if (!retVal.getState().getMetaData().getIndices().isEmpty()) {
            indices.add(stashedIndex);
        } // (else doesn't exist...)

        // Live indexes:

        String indexPattern = new StringBuffer("recs_t_").append(commIdStr).append("*").toString();
        retVal = indexMgr.getRawClient().admin().cluster().prepareState().setIndices(indexPattern)
                .setRoutingTable(false).setNodes(false).setListenerThreaded(false).get();

        for (IndexMetaData indexMetadata : retVal.getState().getMetaData()) {
            //DEBUG
            //System.out.println("INDEX=" + indexMetadata.index());
            indices.add(indexMetadata.index());
        }
        deleteSourceKeyRecords(indexMgr, indices.toArray(new String[0]), testInfo.sourceKey);

        _logger.info("Deleted key=" + testInfo.sourceKey + " from indexes="
                + ArrayUtils.toString(indices.toArray()));

        // Now I've deleted, go and distribute the deletion messages to the slaves
        if ((null != testInfo.distributed) && testInfo.distributed) {
            // Copy into the slaves' queue
            DBCursor dbc = DbManager.getIngest().getLogHarvesterSlaves().find();
            while (dbc.hasNext()) {
                BasicDBObject slave = (BasicDBObject) dbc.next();
                testInfo.forSlave = slave.getString("_id");
                _logHarvesterQ.push(testInfo.toDb());
                testInfo.forSlave = null;

                //DEBUG
                //System.out.println("DISTRIBUTING DELETION MESSAGE TO " + slave.toString());
                _logger.info("distributing deletion message to host=" + slave.toString());
            }
        } //TESTED (by hand)
    } //(end loop over secondary queue, ie to actually delete the indexes)      

}

From source file:com.ikanow.infinit.e.application.utils.LogstashConfigUtils.java

License:Open Source License

public static void main(String[] args) throws IOException {

    System.out.println(Arrays.toString(args));
    Globals.setIdentity(com.ikanow.infinit.e.data_model.Globals.Identity.IDENTITY_API);
    Globals.overrideConfigLocation(args[0]);

    // 1) Errored sources - things that break the formatting
    StringBuffer errors = new StringBuffer();
    String testName;//from  w  w  w. ja v a2s .c o  m
    // 1.1) {} mismatch 1
    //a
    errors.setLength(0);
    testName = "error_1_1a";
    if (null != parseLogstashConfig(getTestFile(testName), errors)) {
        System.out.println("**** FAIL " + testName);
    } else if (!errors.toString().startsWith("{} Mismatch (})")) {
        System.out.println("**** FAIL " + testName + ": " + errors.toString());
    }
    //b
    errors.setLength(0);
    testName = "error_1_1b";
    if (null != parseLogstashConfig(getTestFile(testName), errors)) {
        System.out.println("**** FAIL " + testName);
    } else if (!errors.toString().startsWith("{} Mismatch (})")) {
        System.out.println("**** FAIL " + testName + ": " + errors.toString());
    }
    //c
    errors.setLength(0);
    testName = "error_1_1c";
    if (null != parseLogstashConfig(getTestFile(testName), errors)) {
        System.out.println("**** FAIL " + testName);
    } else if (!errors.toString().startsWith("{} Mismatch (})")) {
        System.out.println("**** FAIL " + testName + ": " + errors.toString());
    }

    // 1.2) {} mismatch 2

    //a
    errors.setLength(0);
    testName = "error_1_2a";
    if (null != parseLogstashConfig(getTestFile(testName), errors)) {
        System.out.println("**** FAIL " + testName);
    } else if (!errors.toString().startsWith("{} Mismatch ({)")) {
        System.out.println("**** FAIL " + testName + ": " + errors.toString());
    }

    // 1.3) multiple input/filter blocks
    // 1.3a) input
    errors.setLength(0);
    testName = "error_1_3a";
    if (null != parseLogstashConfig(getTestFile(testName), errors)) {
        System.out.println("**** FAIL " + testName);
    } else if (!errors.toString().equals("Multiple input or filter blocks: input")) {
        System.out.println("**** FAIL " + testName + ": " + errors.toString());
    }
    // 1.3b) filter
    errors.setLength(0);
    testName = "error_1_3b";
    if (null != parseLogstashConfig(getTestFile(testName), errors)) {
        System.out.println("**** FAIL " + testName);
    } else if (!errors.toString().equals("Multiple input or filter blocks: filter")) {
        System.out.println("**** FAIL " + testName + ": " + errors.toString());
    }

    // 1.4) unrecognized blocks
    // a output - special case
    errors.setLength(0);
    testName = "error_1_4a";
    if (null != parseLogstashConfig(getTestFile(testName), errors)) {
        System.out.println("**** FAIL " + testName);
    } else if (!errors.toString()
            .equals("Not allowed output blocks - these are appended automatically by the logstash harvester")) {
        System.out.println("**** FAIL " + testName + ": " + errors.toString());
    }
    // b
    errors.setLength(0);
    testName = "error_1_4b";
    if (null != parseLogstashConfig(getTestFile(testName), errors)) {
        System.out.println("**** FAIL " + testName);
    } else if (!errors.toString().equals("Unrecognized processing block: something_random")) {
        System.out.println("**** FAIL " + testName + ": " + errors.toString());
    }

    // 1.5) fields/sub-elements that are not permitted
    // a ... sincedb_path
    errors.setLength(0);
    testName = "error_1_5a";
    if (null != parseLogstashConfig(getTestFile(testName), errors)) {
        System.out.println("**** FAIL " + testName);
    } else if (!errors.toString().equals("Not allowed sincedb_path in input.* block")) {
        System.out.println("**** FAIL " + testName + ": " + errors.toString());
    }
    // b ... filter as sub-path of input
    errors.setLength(0);
    testName = "error_1_5b";
    if (null != parseLogstashConfig(getTestFile(testName), errors)) {
        System.out.println("**** FAIL " + testName);
    } else if (!errors.toString().equals("Not allowed sub-elements of input called 'filter' (1)")) {
        System.out.println("**** FAIL " + testName + ": " + errors.toString());
    }
    // c ... filter as sub-path of sub-element of input
    errors.setLength(0);
    testName = "error_1_5c";
    if (null != parseLogstashConfig(getTestFile(testName), errors)) {
        System.out.println("**** FAIL " + testName);
    } else if (!errors.toString().equals("Not allowed sub-elements of input called 'filter' (2)")) {
        System.out.println("**** FAIL " + testName + ": " + errors.toString());
    }

    // 2) Valid formatted source
    BasicDBObject retVal;
    String output;
    String inputName; // (for re-using config files across text)
    //2.1)
    errors.setLength(0);
    testName = "success_2_1";
    if (null == (retVal = parseLogstashConfig(getTestFile(testName), errors))) {
        System.out.println("**** FAIL " + testName + ": " + errors.toString());
    } else if (!retVal.toString().equals(
            "{ \"input\" : { \"file\" : [ { \"path\" : { } , \"start_position\" : { } , \"type\" : { } , \"codec.multiline\" : { }}]} , \"filter\" : { \"csv\" : [ { \"columns\" : { }}] , \"drop\" : [ { }] , \"mutate\" : [ { \"convert\" : { }} , { \"add_fields\" : { }} , { \"rename\" : { }}] , \"date\" : [ { \"timezone\" : { } , \"match\" : { }}] , \"geoip\" : [ { \"source\" : { } , \"fields\" : { }}]}}")) {
        System.out.println("**** FAIL " + testName + ": " + retVal.toString());
    }
    //System.out.println("(val="+retVal+")");

    // 2.2
    errors.setLength(0);
    testName = "success_2_2";
    if (null == (retVal = parseLogstashConfig(getTestFile(testName), errors))) {
        System.out.println("**** FAIL " + testName + ": " + errors.toString());
    }
    if (null == MongoDbUtil.getProperty(retVal, "filter.geoip.fields")) {
        System.out.println("**** FAIL " + testName + ": " + retVal);
    }
    //System.out.println(retVal);

    //2.3)   - check that the sincedb is added correctly, plus the sourceKey manipulation
    // (USE success_2_1 for this)
    errors.setLength(0);
    testName = "inputs_2_3";
    inputName = "success_2_3";
    if (null == (output = validateLogstashInput(testName, getTestFile(inputName), errors, true))) {
        System.out.println("**** FAIL " + testName + ": errored: " + errors);
    } else {
        String outputToTest = output.replaceAll("[\n\r]", "\\\\n").replaceAll("\\s+", " ");
        String testAgainst = "input {\n\n file {\n sincedb_path => \"_XXX_DOTSINCEDB_XXX_\"\n\n\n path => \"/root/odin-poc-data/proxy_logs/may_known_cnc.csv\"\n\n start_position => beginning\n\n type => \"proxy_logs\"\n\n codec => multiline {\n\n pattern => \"^%{YEAR}-%{MONTHNUM}-%{MONTHDAY}%{DATA:summary}\"\n\n negate => true\n\n what => \"previous\"\n\n } \n\n add_field => [ \"sourceKey\", \"inputs_2_3\"] \n\n}\n\n}\n\n\n\nfilter { \n if [sourceKey] == \"inputs_2_3\" { \n\n \n\n if [type] == \"proxy_logs\" {\n\n csv {\n\n columns => [\"Device_Name\",\"SimpleDate\",\"Event_#Date\",\"Source_IP\",\"Source_Port\",\"Destination_IP\",\"Destination_Port\",\"Protocol\",\"Vendor_Alert\",\"MSS_Action\",\"Logging_Device_IP\",\"Application\",\"Bytes_Received\",\"Bytes_Sent\",\"Dest._Country\",\"Message\",\"Message_Type\",\"MSS_Log_Source_IP\",\"MSS_Log_Source_Type\",\"MSS_Log_Source_UUID\",\"network_protocol_id\",\"OS_Type\",\"PIX_Main-Code\",\"PIX_Sub-Code\",\"Port\",\"Product_ID\",\"Product\",\"Rule\",\"Rule_Identifier\",\"Sensor_Name\",\"Class\",\"Translate_Destination_IP\",\"Translate_Destination_Port\",\"Translate_Source_IP\"]\n\n }\n\n if [Device_Name] == \"Device Name\" {\n\n drop {}\n\n }\n\n mutate {\n\n convert => [ \"Bytes_Received\", \"integer\" ]\n\n convert => [ \"Bytes_Sent\", \"integer\" ]\n\n }\n\n date {\n\n timezone => \"Europe/London\"\n\n match => [ \"Event_Date\" , \"yyyy-MM-dd'T'HH:mm:ss\" ]\n\n }\n\n geoip {\n\n source => \"Destination_IP\"\n\n fields => [\"timezone\",\"location\",\"latitude\",\"longitude\"]\n\n }\n\n }\n\n\n\n mutate { update => [ \"sourceKey\", \"inputs_2_3\"] } \n}\n}\n";
        testAgainst = testAgainst.replaceAll("[\n\r]", "\\\\n").replaceAll("\\s+", " ");
        if (!outputToTest.equals(testAgainst)) {
            System.out.println("**** FAIL " + testName + ": " + output);
        }
    }

    // 3) Valid formatted source, access to restricted types

    // 3.1) input 
    // a) restricted - admin
    // (USE success_2_1 for this)
    errors.setLength(0);
    testName = "inputs_3_1a";
    inputName = "success_2_1";
    if (null != (output = validateLogstashInput(testName, getTestFile(inputName), errors, false))) {
        System.out.println("**** FAIL " + testName + ": Should have errored: " + output);
    } else if (!errors.toString()
            .startsWith("Security error, non-admin not allowed input type file, allowed options: ")) {
        System.out.println("**** FAIL " + testName + ": " + errors.toString());
    }

    // b) restricted - non admin
    // (USE success_2_1 for this)
    errors.setLength(0);
    testName = "inputs_3_1b";
    inputName = "success_2_1";
    if (null == (output = validateLogstashInput(testName, getTestFile(inputName), errors, true))) {
        System.out.println("**** FAIL " + testName + ": " + errors.toString());
    }

    // c) unrestricted - non admin
    errors.setLength(0);
    testName = "inputs_3_1c";
    inputName = "inputs_3_1c";
    if (null == (output = validateLogstashInput(testName, getTestFile(inputName), errors, true))) {
        System.out.println("**** FAIL " + testName + ": " + errors.toString());
    }
    //System.out.println("(val="+output+")");

    // d) no input at all
    errors.setLength(0);
    testName = "inputs_3_1d";
    inputName = "inputs_3_1d";
    if (null != (output = validateLogstashInput(testName, getTestFile(inputName), errors, false))) {
        System.out.println("**** FAIL " + testName + ": Should have errored: " + output);
    } else if (!errors.toString().startsWith(
            "Invalid input format, should be 'input { INPUT_TYPE { ... } }' (only one INPUT_TYPE) and also contain a filter, no \"s around them.")) {
        System.out.println("**** FAIL " + testName + ": " + errors.toString());
    }

    // 3.2) filter
    // a) restricted - admin
    errors.setLength(0);
    testName = "filters_3_2a";
    inputName = "filters_3_2a";
    if (null != (output = validateLogstashInput(testName, getTestFile(inputName), errors, false))) {
        System.out.println("**** FAIL " + testName + ": Should have errored: " + output);
    } else if (!errors.toString()
            .startsWith("Security error, non-admin not allowed filter type elasticsearch, allowed options: ")) {
        System.out.println("**** FAIL " + testName + ": " + errors.toString());
    }
    //System.out.println("(err="+errors.toString()+")");

    // b) restricted - non admin
    // (USE filters_3_2a for this)
    errors.setLength(0);
    testName = "filters_3_2a";
    inputName = "filters_3_2a";
    if (null == (output = validateLogstashInput(testName, getTestFile(inputName), errors, true))) {
        System.out.println("**** FAIL " + testName + ": " + errors.toString());
    }
    //System.out.println("(val="+output+")");

    // c) unrestricted - non admin
    // (implicitly tested via 3.1bc)

    // d) no filter at all
    errors.setLength(0);
    testName = "filters_3_2d";
    inputName = "filters_3_2d";
    if (null != (output = validateLogstashInput(testName, getTestFile(inputName), errors, false))) {
        System.out.println("**** FAIL " + testName + ": Should have errored: " + output);
    } else if (!errors.toString().startsWith(
            "Invalid input format, should be 'input { INPUT_TYPE { ... } }' (only one INPUT_TYPE) and also contain a filter, no \"s around them.")) {
        System.out.println("**** FAIL " + testName + ": " + errors.toString());
    }

}

From source file:com.ikanow.infinit.e.core.mapreduce.HadoopJobRunner.java

License:Open Source License

private void createConfigXML(Writer out, String title, String input, String fields, boolean isCustomTable,
        String outputDatabase, String output, String tempOutputCollection, String mapper, String reducer,
        String combiner, String query, List<ObjectId> communityIds, String outputKey, String outputValue,
        String arguments) throws IOException {
    String dbserver = prop_general.getDatabaseServer();
    output = outputDatabase + "." + tempOutputCollection;

    int nSplits = 8;
    int nDocsPerSplit = 12500;

    //add communities to query if this is not a custom table
    if (!isCustomTable) {
        // Start with the old query:
        BasicDBObject oldQueryObj = null;
        if (query.startsWith("{")) {
            oldQueryObj = (BasicDBObject) com.mongodb.util.JSON.parse(query);
        } else {/*  ww  w .  j  a  va 2  s . c  om*/
            oldQueryObj = new BasicDBObject();
        }

        // Community Ids aren't indexed in the metadata collection, but source keys are, so we need to transform to that
        BasicDBObject keyQuery = new BasicDBObject(SourcePojo.communityIds_,
                new BasicDBObject(DbManager.in_, communityIds));
        boolean bAdminOverride = false;
        if (oldQueryObj.containsField("admin")) { // For testing only...
            if (1 == communityIds.size()) {
                ObjectId communityId = communityIds.get(0);
                if (RESTTools.adminLookup(communityId.toString())) {
                    bAdminOverride = true;
                    if (oldQueryObj.containsField("max.splits")) {
                        nSplits = oldQueryObj.getInt("max.splits");
                    }
                    if (oldQueryObj.containsField("max.docs.per.split")) {
                        nDocsPerSplit = oldQueryObj.getInt("max.docs.per.split");
                    }
                }
            }
        } //(end diagnostic/benchmarking/test code for admins only part 1)
        if (bAdminOverride) {
            oldQueryObj = (BasicDBObject) oldQueryObj.get("admin");
            //(end diagnostic/benchmarking/test code for admins only part 2)
        } else if (oldQueryObj.containsField(DocumentPojo.sourceKey_) || input.startsWith("feature.")) {
            // Source Key specified by user, stick communityIds check in for security
            oldQueryObj.put(DocumentPojo.communityId_, new BasicDBObject(DbManager.in_, communityIds));
        } else { // Source key not specified by user, transform communities->sourcekeys
            BasicDBObject keyFields = new BasicDBObject(SourcePojo.key_, 1);
            DBCursor dbc = MongoDbManager.getIngest().getSource().find(keyQuery, keyFields);
            if (dbc.count() > 500) {
                // (too many source keys let's keep the query size sensible...)
                oldQueryObj.put(DocumentPojo.communityId_, new BasicDBObject(DbManager.in_, communityIds));
            } else {
                HashSet<String> sourceKeys = new HashSet<String>();
                while (dbc.hasNext()) {
                    DBObject dbo = dbc.next();
                    String sourceKey = (String) dbo.get(SourcePojo.key_);
                    if (null != sourceKey) {
                        sourceKeys.add(sourceKey);
                    }
                }
                if (sourceKeys.isEmpty()) { // query returns empty
                    throw new RuntimeException("Communities contain no sources");
                }
                BasicDBObject newQueryClauseObj = new BasicDBObject(DbManager.in_, sourceKeys);
                // Now combine the queries...
                oldQueryObj.put(DocumentPojo.sourceKey_, newQueryClauseObj);

            } // (end if too many source keys across the communities)
        } //(end if need to break source keys down into communities)
        query = oldQueryObj.toString();
    } else {
        //get the custom table (and database)
        input = getCustomDbAndCollection(input);
    }
    if (arguments == null)
        arguments = "";

    // Generic configuration
    out.write("<?xml version=\"1.0\"?>\n<configuration>");

    // Mongo specific configuration

    out.write("\n\t<property><!-- name of job shown in jobtracker --><name>mongo.job.name</name><value>" + title
            + "</value></property>"
            + "\n\t<property><!-- run the job verbosely ? --><name>mongo.job.verbose</name><value>true</value></property>"
            + "\n\t<property><!-- Run the job in the foreground and wait for response, or background it? --><name>mongo.job.background</name><value>false</value></property>"
            + "\n\t<property><!-- If you are reading from mongo, the URI --><name>mongo.input.uri</name><value>mongodb://"
            + dbserver + "/" + input + "</value></property>"
            + "\n\t<property><!-- If you are writing to mongo, the URI --><name>mongo.output.uri</name><value>mongodb://"
            + dbserver + "/" + output + "</value>  </property>"
            + "\n\t<property><!-- The query, in JSON, to execute [OPTIONAL] --><name>mongo.input.query</name><value>"
            + query + "</value></property>"
            + "\n\t<property><!-- The fields, in JSON, to read [OPTIONAL] --><name>mongo.input.fields</name><value>"
            + ((fields == null) ? ("") : fields) + "</value></property>"
            + "\n\t<property><!-- A JSON sort specification for read [OPTIONAL] --><name>mongo.input.sort</name><value></value></property>"
            + "\n\t<property><!-- The number of documents to limit to for read [OPTIONAL] --><name>mongo.input.limit</name><value>0</value><!-- 0 == no limit --></property>"
            + "\n\t<property><!-- The number of documents to skip in read [OPTIONAL] --><!-- TODO - Are we running limit() or skip() first? --><name>mongo.input.skip</name><value>0</value> <!-- 0 == no skip --></property>"
            + "\n\t<property><!-- Class for the mapper --><name>mongo.job.mapper</name><value>" + mapper
            + "</value></property>"
            + "\n\t<property><!-- Reducer class --><name>mongo.job.reducer</name><value>" + reducer
            + "</value></property>"
            + "\n\t<property><!-- InputFormat Class --><name>mongo.job.input.format</name><value>com.ikanow.infinit.e.data_model.custom.InfiniteMongoInputFormat</value></property>"
            + "\n\t<property><!-- OutputFormat Class --><name>mongo.job.output.format</name><value>com.mongodb.hadoop.MongoOutputFormat</value></property>"
            + "\n\t<property><!-- Output key class for the output format --><name>mongo.job.output.key</name><value>"
            + outputKey + "</value></property>"
            + "\n\t<property><!-- Output value class for the output format --><name>mongo.job.output.value</name><value>"
            + outputValue + "</value></property>"
            + "\n\t<property><!-- Output key class for the mapper [optional] --><name>mongo.job.mapper.output.key</name><value></value></property>"
            + "\n\t<property><!-- Output value class for the mapper [optional] --><name>mongo.job.mapper.output.value</name><value></value></property>"
            + "\n\t<property><!-- Class for the combiner [optional] --><name>mongo.job.combiner</name><value>"
            + combiner + "</value></property>"
            + "\n\t<property><!-- Partitioner class [optional] --><name>mongo.job.partitioner</name><value></value></property>"
            + "\n\t<property><!-- Sort Comparator class [optional] --><name>mongo.job.sort_comparator</name><value></value></property>"
            + "\n\t<property><!-- Split Size [optional] --><name>mongo.input.split_size</name><value>32</value></property>");

    // Infinit.e specific configuration

    out.write("\n\t<property><!-- User Arguments [optional] --><name>arguments</name><value>"
            + StringEscapeUtils.escapeXml(arguments) + "</value></property>"
            + "\n\t<property><!-- Maximum number of splits [optional] --><name>max.splits</name><value>"
            + nSplits + "</value></property>"
            + "\n\t<property><!-- Maximum number of docs per split [optional] --><name>max.docs.per.split</name><value>"
            + nDocsPerSplit + "</value></property>");

    // Closing thoughts:
    out.write("\n</configuration>");

    out.flush();
    out.close();
}

From source file:com.ikanow.infinit.e.data_model.api.ResponsePojo.java

License:Apache License

public static ResponsePojo fromDb(BasicDBObject bson) {
    BasicDBObject bson2 = new BasicDBObject();
    bson2.put("stats", bson.get("stats"));
    bson2.put("response", bson.get("response"));
    ResponsePojo rp = ResponsePojo.fromApi(bson2.toString(), ResponsePojo.class);

    // Now all the elements!
    Object evtTimeline = null, facets = null, times = null, entities = null, events = null, facts = null,
            summaries = null, sources = null, sourceMetaTags = null, sourceMetaTypes = null, moments = null,
            other = null;/* ww w. j  a v  a  2  s  .  co  m*/

    evtTimeline = bson.get("eventsTimeline");
    facets = bson.get("facets");
    times = bson.get("times");
    entities = bson.get("entities");
    events = bson.get("events");
    facts = bson.get("facts");
    summaries = bson.get("summaries");
    sources = bson.get("sources");
    sourceMetaTags = bson.get("sourceMetatags");
    sourceMetaTypes = bson.get("sourceMetaTypes");
    moments = bson.get("moments");
    other = bson.get("other");

    rp.setEventsTimeline(evtTimeline);
    rp.setFacets(facets);
    rp.setTimes(times, rp.getTimeInterval() == null ? 0 : rp.getTimeInterval());
    rp.setEntities(entities);
    rp.setEvents(events);
    rp.setFacts(facts);
    rp.setSummaries(summaries);
    rp.setSources(sources);
    rp.setSourceMetaTags(sourceMetaTags);
    rp.setSourceMetaTypes(sourceMetaTypes);
    rp.setMoments(moments, rp.getMomentInterval());
    rp.setOther(other);

    // The main data object is discarded in the original fromApi() call, so put it back now
    Object docData = bson.get("data");
    if (null != docData) {
        rp.setData((BasicDBList) docData, (BasePojoApiMap<BasicDBList>) null);
    } else { // (ensure there's always an empty list)
        rp.setData(new ArrayList<BasicDBObject>(0), (BasePojoApiMap<BasicDBObject>) null);
    }
    return rp;
}

From source file:com.ikanow.infinit.e.data_model.custom.InfiniteMongoInputFormat.java

License:Apache License

@Override
public List<InputSplit> getSplits(JobContext context) {
    final Configuration hadoopConfiguration = context.getConfiguration();
    final InfiniteMongoConfig conf = new InfiniteMongoConfig(hadoopConfiguration);
    List<InputSplit> splits = InfiniteMongoSplitter.calculateSplits(conf);

    if (conf.getSelfMerge() != null) {
        //check if we need to grab existing records and add them to the splits
        final Configuration existingConfiguration = context.getConfiguration();
        existingConfiguration.set("mongo.input.uri", conf.getSelfMerge());
        BasicDBObject query = new BasicDBObject();
        //add on this query to only get items previous to now if no reducer is specified (otherwise
        //we will leak any items we map on the first run back in before this split runs)
        if (context.getNumReduceTasks() == 0)
            query.put("_id", new BasicDBObject(MongoDbManager.lt_, new ObjectId()));
        existingConfiguration.set("mongo.input.query", query.toString());
        final InfiniteMongoConfig existingConf = new InfiniteMongoConfig(existingConfiguration);
        splits.addAll(InfiniteMongoSplitter.calculateSplits(existingConf));
    }//from   ww  w  .j  a  v a2s . c  om
    return splits;
}

From source file:com.ikanow.infinit.e.data_model.driver.InfiniteDriver.java

License:Apache License

public Set<String> updateAliases(Collection<EntityFeaturePojo> aliasesToUpdate, String communityIdStr,
        boolean bUpsert, Map<String, List<SharePojo>> aliasMapping, ResponseObject response) {
    if (null == aliasMapping) {
        aliasMapping = new HashMap<String, List<SharePojo>>();
        this.getAliases(communityIdStr, aliasMapping, response);
        if (!response.isSuccess()) {
            return null;
        }/*from  w w  w. j a  v a2s . com*/
    } //TESTED
    Map<ObjectId, BasicDBObject> shareContentCache = new HashMap<ObjectId, BasicDBObject>();
    List<SharePojo> sharesToUpdate = new LinkedList<SharePojo>();
    // Step through the aliases, update the content
    // Loop 1 update
    SharePojo shareForNewAliases = null;
    Set<String> erroredAliases = new HashSet<String>();
    HashMultimap<ObjectId, String> shareToAliasMapping = HashMultimap.create();
    for (EntityFeaturePojo alias : aliasesToUpdate) {
        List<SharePojo> sharesForThisAlias = aliasMapping.get(alias.getIndex());
        if ((null == sharesForThisAlias) && bUpsert) { // This is a new alias and not ignoring upserts
            if (null == shareForNewAliases) { // Haven't yet assigned such a share
                shareForNewAliases = this.upsertSharePrep(communityIdStr, shareContentCache, aliasMapping);
                if (null == shareForNewAliases) {
                    erroredAliases.add(alias.getIndex());
                    continue;
                }
                sharesToUpdate.add(shareForNewAliases);
            }
            BasicDBObject shareContent = shareContentCache.get(shareForNewAliases.get_id()); // (exists by construction)
            shareContent.put(alias.getIndex(), alias.toDb());
            shareToAliasMapping.put(shareForNewAliases.get_id(), alias.getIndex());
        } //TESTED
        else if (null != sharesForThisAlias) {
            for (SharePojo share : sharesForThisAlias) {
                BasicDBObject shareContent = shareContentCache.get(share.get_id());
                if (null == shareContent) {
                    try {
                        String json = share.getShare();
                        shareContent = (BasicDBObject) JSON.parse(json);
                        shareContentCache.put(share.get_id(), shareContent);
                        sharesToUpdate.add(share);
                    } catch (Exception e) {
                        erroredAliases.add(alias.getIndex());
                    }
                } //TESTED
                shareContent.put(alias.getIndex(), alias.toDb());
                shareToAliasMapping.put(share.get_id(), alias.getIndex());
            } //TESTED
        } else {
            erroredAliases.add(alias.getIndex());
        }
        // end loop over updating shares
    } //end loop over aliases

    // Loop 2 now update all the shares
    boolean bSucceededUpdatingSomething = false;
    for (SharePojo share : sharesToUpdate) {
        BasicDBObject shareContent = shareContentCache.get(share.get_id()); // (exists by construction)
        String shareIdStr = share.get_id().toString();
        this.updateShareJSON(shareIdStr, share.getTitle(), share.getDescription(), "infinite-entity-alias",
                shareContent.toString(), response);
        bSucceededUpdatingSomething |= response.isSuccess();
        if (!response.isSuccess()) {
            Set<String> failedAliases = shareToAliasMapping.get(share.get_id());
            if (null != failedAliases) {
                erroredAliases.addAll(failedAliases);
            }
        }
    } //TESTED      

    response.setSuccess(bSucceededUpdatingSomething);
    return erroredAliases;
}

From source file:com.ikanow.infinit.e.data_model.driver.InfiniteDriver.java

License:Apache License

public Set<String> removeAliases(Collection<String> aliasesToRemove, String communityIdStr,
        Map<String, List<SharePojo>> aliasMapping, ResponseObject response) {
    if (null == aliasMapping) {
        aliasMapping = new HashMap<String, List<SharePojo>>();
        this.getAliases(communityIdStr, aliasMapping, response);
        if (!response.isSuccess()) {
            return null;
        }/* w  ww.  ja  v  a  2s. c  o  m*/
    } //TESTED
    Map<ObjectId, BasicDBObject> shareContentCache = new HashMap<ObjectId, BasicDBObject>();
    List<SharePojo> sharesToUpdate = new LinkedList<SharePojo>();
    // Step through the aliases, update the content
    // Loop 1 update
    Set<String> erroredAliases = new HashSet<String>();
    HashMultimap<ObjectId, String> shareToAliasMapping = HashMultimap.create();
    for (String alias : aliasesToRemove) {
        List<SharePojo> sharesForThisAlias = aliasMapping.get(alias);
        if (null != sharesForThisAlias) {
            for (SharePojo share : sharesForThisAlias) {
                BasicDBObject shareContent = shareContentCache.get(share.get_id());
                if (null == shareContent) {
                    try {
                        String json = share.getShare();
                        shareContent = (BasicDBObject) JSON.parse(json);
                        shareContentCache.put(share.get_id(), shareContent);
                        sharesToUpdate.add(share);
                    } catch (Exception e) {
                        erroredAliases.add(alias);
                    }
                } //TESTED
                shareContent.remove(alias);
                shareToAliasMapping.put(share.get_id(), alias);
            } //TESTED
        }
        // end loop over updating shares
    } //end loop over aliases

    // Loop 2 now update all the shares
    boolean bSucceededUpdatingSomething = false;
    for (SharePojo share : sharesToUpdate) {
        BasicDBObject shareContent = shareContentCache.get(share.get_id()); // (exists by construction)
        String shareIdStr = share.get_id().toString();
        if (shareContent.isEmpty()) { // Remove the share
            this.removeShare(shareIdStr, response);
            if (!response.isSuccess()) {
                Set<String> failedAliases = shareToAliasMapping.get(share.get_id());
                if (null != failedAliases) {
                    erroredAliases.addAll(failedAliases);
                }
            }
        } //TESTED
        else {
            this.updateShareJSON(shareIdStr, share.getTitle(), share.getDescription(), "infinite-entity-alias",
                    shareContent.toString(), response);
            bSucceededUpdatingSomething |= response.isSuccess();
            if (!response.isSuccess()) {
                Set<String> failedAliases = shareToAliasMapping.get(share.get_id());
                if (null != failedAliases) {
                    erroredAliases.addAll(failedAliases);
                }
            }
        } //TESTED
    } //TESTED      

    response.setSuccess(bSucceededUpdatingSomething);
    return erroredAliases;
}

From source file:com.ikanow.infinit.e.processing.custom.launcher.CustomHadoopTaskLauncher.java

License:Open Source License

private void createConfigXML(Writer out, String title, String input, String fields, boolean isCustomTable,
        String outputDatabase, String output, String tempOutputCollection, String mapper, String reducer,
        String combiner, String query, List<ObjectId> communityIds, String outputKey, String outputValue,
        String arguments, Boolean incrementalMode, ObjectId userId, Boolean selfMerge,
        String originalOutputCollection, Boolean appendResults) throws IOException {
    String dbserver = prop_general.getDatabaseServer();
    output = outputDatabase + "." + tempOutputCollection;

    boolean isAdmin = AuthUtils.isAdmin(userId);

    int nSplits = 8;
    int nDocsPerSplit = 12500;

    //add communities to query if this is not a custom table
    BasicDBObject oldQueryObj = null;
    BasicDBObject srcTags = null;/*from w w w.  ja v a 2 s  .  c om*/
    // Start with the old query:
    if (query.startsWith("{")) {
        oldQueryObj = (BasicDBObject) com.mongodb.util.JSON.parse(query);
    } else {
        oldQueryObj = new BasicDBObject();
    }
    boolean elasticsearchQuery = oldQueryObj.containsField("qt") && !isCustomTable;
    int nLimit = 0;
    if (oldQueryObj.containsField("$limit")) {
        nLimit = oldQueryObj.getInt("$limit");
        oldQueryObj.remove("$limit");
    }
    if (oldQueryObj.containsField("$splits")) {
        nSplits = oldQueryObj.getInt("$splits");
        oldQueryObj.remove("$splits");
    }
    if (oldQueryObj.containsField("$srctags")) {
        srcTags = new BasicDBObject(SourcePojo.tags_, oldQueryObj.get("$srctags"));
        oldQueryObj.remove("$srctags");
    }
    if (bLocalMode) { // If in local mode, then set this to a large number so we always run inside our limit/split version
        // (since for some reason MongoInputFormat seems to fail on large collections)
        nSplits = InfiniteMongoSplitter.MAX_SPLITS;
    }
    if (oldQueryObj.containsField("$docsPerSplit")) {
        nDocsPerSplit = oldQueryObj.getInt("$docsPerSplit");
        oldQueryObj.remove("$docsPerSplit");
    }
    oldQueryObj.remove("$fields");
    oldQueryObj.remove("$output");
    oldQueryObj.remove("$reducers");
    String mapperKeyClass = oldQueryObj.getString("$mapper_key_class", "");
    String mapperValueClass = oldQueryObj.getString("$mapper_value_class", "");
    oldQueryObj.remove("$mapper_key_class");
    oldQueryObj.remove("$mapper_value_class");
    String cacheList = null;
    Object cacheObj = oldQueryObj.get("$caches");
    if (null != cacheObj) {
        cacheList = cacheObj.toString(); // (either array of strings, or single string)
        if (!cacheList.startsWith("[")) {
            cacheList = "[" + cacheList + "]"; // ("must" now be valid array)
        }
        oldQueryObj.remove("$caches");
    } //TESTED

    if (null != nDebugLimit) { // (debug mode override)
        nLimit = nDebugLimit;
    }
    boolean tmpIncMode = (null != incrementalMode) && incrementalMode;

    Date fromOverride = null;
    Date toOverride = null;
    Object fromOverrideObj = oldQueryObj.remove("$tmin");
    Object toOverrideObj = oldQueryObj.remove("$tmax");
    if (null != fromOverrideObj) {
        fromOverride = InfiniteHadoopUtils.dateStringFromObject(fromOverrideObj, true);
    }
    if (null != toOverrideObj) {
        toOverride = InfiniteHadoopUtils.dateStringFromObject(toOverrideObj, false);
    }

    if (!isCustomTable) {
        if (elasticsearchQuery) {
            oldQueryObj.put("communityIds", communityIds);
            //tmin/tmax not supported - already have that capability as part of the query
        } else {
            if (input.equals("feature.temporal")) {
                if ((null != fromOverride) || (null != toOverride)) {
                    oldQueryObj.put("value.maxTime",
                            InfiniteHadoopUtils.createDateRange(fromOverride, toOverride, true));
                } //TESTED
                oldQueryObj.put("_id.c", new BasicDBObject(DbManager.in_, communityIds));
            } else {
                oldQueryObj.put(DocumentPojo.communityId_, new BasicDBObject(DbManager.in_, communityIds));
                if ((null != fromOverride) || (null != toOverride)) {
                    oldQueryObj.put("_id",
                            InfiniteHadoopUtils.createDateRange(fromOverride, toOverride, false));
                } //TESTED         
                if (input.equals("doc_metadata.metadata")) {
                    oldQueryObj.put(DocumentPojo.index_, new BasicDBObject(DbManager.ne_, "?DEL?")); // (ensures not soft-deleted)
                }
            }
        }
    } else {
        if ((null != fromOverride) || (null != toOverride)) {
            oldQueryObj.put("_id", InfiniteHadoopUtils.createDateRange(fromOverride, toOverride, false));
        } //TESTED
          //get the custom table (and database)
        input = CustomOutputManager.getCustomDbAndCollection(input);
    }
    query = oldQueryObj.toString();

    if (arguments == null)
        arguments = "";

    // Generic configuration
    out.write("<?xml version=\"1.0\"?>\n<configuration>");

    // Mongo specific configuration
    out.write("\n\t<property><!-- name of job shown in jobtracker --><name>mongo.job.name</name><value>" + title
            + "</value></property>"
            + "\n\t<property><!-- run the job verbosely ? --><name>mongo.job.verbose</name><value>true</value></property>"
            + "\n\t<property><!-- Run the job in the foreground and wait for response, or background it? --><name>mongo.job.background</name><value>false</value></property>"
            + "\n\t<property><!-- If you are reading from mongo, the URI --><name>mongo.input.uri</name><value>mongodb://"
            + dbserver + "/" + input + "</value></property>"
            + "\n\t<property><!-- If you are writing to mongo, the URI --><name>mongo.output.uri</name><value>mongodb://"
            + dbserver + "/" + output + "</value>  </property>"
            + "\n\t<property><!-- The query, in JSON, to execute [OPTIONAL] --><name>mongo.input.query</name><value>"
            + StringEscapeUtils.escapeXml(query) + "</value></property>"
            + "\n\t<property><!-- The fields, in JSON, to read [OPTIONAL] --><name>mongo.input.fields</name><value>"
            + ((fields == null) ? ("") : fields) + "</value></property>"
            + "\n\t<property><!-- A JSON sort specification for read [OPTIONAL] --><name>mongo.input.sort</name><value></value></property>"
            + "\n\t<property><!-- The number of documents to limit to for read [OPTIONAL] --><name>mongo.input.limit</name><value>"
            + nLimit + "</value><!-- 0 == no limit --></property>"
            + "\n\t<property><!-- The number of documents to skip in read [OPTIONAL] --><!-- TODO - Are we running limit() or skip() first? --><name>mongo.input.skip</name><value>0</value> <!-- 0 == no skip --></property>"
            + "\n\t<property><!-- Class for the mapper --><name>mongo.job.mapper</name><value>" + mapper
            + "</value></property>"
            + "\n\t<property><!-- Reducer class --><name>mongo.job.reducer</name><value>" + reducer
            + "</value></property>"
            + "\n\t<property><!-- InputFormat Class --><name>mongo.job.input.format</name><value>com.ikanow.infinit.e.data_model.custom.InfiniteMongoInputFormat</value></property>"
            + "\n\t<property><!-- OutputFormat Class --><name>mongo.job.output.format</name><value>com.ikanow.infinit.e.data_model.custom.InfiniteMongoOutputFormat</value></property>"
            + "\n\t<property><!-- Output key class for the output format --><name>mongo.job.output.key</name><value>"
            + outputKey + "</value></property>"
            + "\n\t<property><!-- Output value class for the output format --><name>mongo.job.output.value</name><value>"
            + outputValue + "</value></property>"
            + "\n\t<property><!-- Output key class for the mapper [optional] --><name>mongo.job.mapper.output.key</name><value>"
            + mapperKeyClass + "</value></property>"
            + "\n\t<property><!-- Output value class for the mapper [optional] --><name>mongo.job.mapper.output.value</name><value>"
            + mapperValueClass + "</value></property>"
            + "\n\t<property><!-- Class for the combiner [optional] --><name>mongo.job.combiner</name><value>"
            + combiner + "</value></property>"
            + "\n\t<property><!-- Partitioner class [optional] --><name>mongo.job.partitioner</name><value></value></property>"
            + "\n\t<property><!-- Sort Comparator class [optional] --><name>mongo.job.sort_comparator</name><value></value></property>"
            + "\n\t<property><!-- Split Size [optional] --><name>mongo.input.split_size</name><value>32</value></property>");

    // Infinit.e specific configuration

    out.write("\n\t<property><!-- User Arguments [optional] --><name>infinit.e.userid</name><value>"
            + StringEscapeUtils.escapeXml(userId.toString()) + "</value></property>"
            + "\n\t<property><!-- User Arguments [optional] --><name>arguments</name><value>"
            + StringEscapeUtils.escapeXml(arguments) + "</value></property>"
            + "\n\t<property><!-- Maximum number of splits [optional] --><name>max.splits</name><value>"
            + nSplits + "</value></property>"
            + "\n\t<property><!-- Maximum number of docs per split [optional] --><name>max.docs.per.split</name><value>"
            + nDocsPerSplit + "</value></property>"
            + "\n\t<property><!-- Infinit.e incremental mode [optional] --><name>update.incremental</name><value>"
            + tmpIncMode + "</value></property>"
            + "\n\t<property><!-- Infinit.e quick admin check [optional] --><name>infinit.e.is.admin</name><value>"
            + isAdmin + "</value></property>"
            + "\n\t<property><!-- Infinit.e userid [optional] --><name>infinit.e.userid</name><value>" + userId
            + "</value></property>");
    if (null != cacheList) {
        out.write(
                "\n\t<property><!-- Infinit.e cache list [optional] --><name>infinit.e.cache.list</name><value>"
                        + cacheList + "</value></property>");
    } //TESTED
    if (null != srcTags) {
        out.write(
                "\n\t<property><!-- Infinit.e src tags filter [optional] --><name>infinit.e.source.tags.filter</name><value>"
                        + srcTags.toString() + "</value></property>");
    }

    if (null != selfMerge && selfMerge && originalOutputCollection != null) {
        originalOutputCollection = "mongodb://" + dbserver + "/" + outputDatabase + "."
                + originalOutputCollection;
        out.write(
                "\n\t<property><!-- This jobs output collection for passing into the mapper along with input collection [optional] --><name>infinit.e.selfMerge</name><value>"
                        + originalOutputCollection + "</value></property>");
    }

    // Closing thoughts:
    out.write("\n</configuration>");

    out.flush();
    out.close();
}

From source file:com.ikanow.infinit.e.processing.custom.utils.InfiniteElasticsearchHadoopUtils.java

License:Apache License

public static void handleElasticsearchInput(CustomMapReduceJobPojo job, Configuration config,
        BasicDBObject advancedConfigurationDbo) {
    // Pull out type list:
    Object o = advancedConfigurationDbo.remove("$types");
    String[] types = null;// w ww . j a va2 s. c o m
    if (null != o) {
        if (o instanceof BasicDBList) {
            types = ((BasicDBList) o).toArray(new String[0]);
        } else if (o instanceof String) {
            types = ((String) o).split("\\s*,\\s*");
        }
    } //TESTED (by hand)            

    //QUERY:

    // Date override:
    Date fromOverride = null;
    Date toOverride = null;
    Object fromOverrideObj = advancedConfigurationDbo.remove("$tmin");
    Object toOverrideObj = advancedConfigurationDbo.remove("$tmax");
    if (null != fromOverrideObj) {
        fromOverride = InfiniteHadoopUtils.dateStringFromObject(fromOverrideObj, true);
    }
    if (null != toOverrideObj) {
        toOverride = InfiniteHadoopUtils.dateStringFromObject(toOverrideObj, false);
    }
    Boolean streaming = null;
    Object streamingObj = advancedConfigurationDbo.remove("$streaming");
    if (streamingObj instanceof Boolean) {
        streaming = (Boolean) streamingObj;
    }

    //DEBUG
    //System.out.println("QUERY = " + advancedConfigurationDbo.toString());

    BasicDBObject newQuery = new BasicDBObject();
    Object queryObj = advancedConfigurationDbo.get("query");
    if (queryObj instanceof String) {
        config.set("es.query", queryObj.toString()); // URL version)         
        if ((null != fromOverride) || (null != toOverride)) {
            throw new RuntimeException(
                    "Can't specify $tmin/$tmax shortcut in conjunction with 'URL' query type");
        } //TESTED
    } else if (null != queryObj) {
        newQuery.put("query", queryObj);
        Object filterObj = advancedConfigurationDbo.get("filter");
        if (null != filterObj)
            newQuery.put("filter", filterObj); // (doesn't matter if it doesn't exist)
        Object fieldsObj = advancedConfigurationDbo.get("fields");
        if (null != fieldsObj)
            newQuery.put("fields", fieldsObj); // (doesn't matter if it doesn't exist)
        Object sizeObj = advancedConfigurationDbo.get("size");
        if (null != sizeObj)
            newQuery.put("size", sizeObj); // (doesn't matter if it doesn't exist)

        if ((null != fromOverride) || (null != toOverride)) {
            if (null == filterObj) {
                BasicDBObject filterRangeParamsDbo = new BasicDBObject();
                if (null != fromOverride) {
                    filterRangeParamsDbo.put("gte", fromOverride.getTime());
                }
                if (null != toOverride) {
                    filterRangeParamsDbo.put("lte", toOverride.getTime());
                }
                BasicDBObject filterRangeDbo = new BasicDBObject("@timestamp", filterRangeParamsDbo);
                BasicDBObject filterDbo = new BasicDBObject("range", filterRangeDbo);
                newQuery.put("filter", filterDbo);
            } else { // combine filter
                throw new RuntimeException(
                        "Can't (currently) specify $tmin/$tmax shortcut in conjunction with filter");
            } //TESTED            
        }

        config.set("es.query", newQuery.toString());
    }
    //(else no query == match all)

    //COMMUNITIES

    Pattern dateRegex = null;
    ThreadSafeSimpleDateFormat tssdf = null;
    if ((null != fromOverride) || (null != toOverride)) {
        dateRegex = Pattern.compile("[0-9]{4}[.][0-9]{2}[.][0-9]{2}");
        tssdf = new ThreadSafeSimpleDateFormat("yyyy.MM.dd");
    } //TESTED

    StringBuffer overallIndexNames = new StringBuffer();
    for (ObjectId commId : job.communityIds) {
        StringBuffer indexNames = new StringBuffer();
        //TODO (INF-2641): need to handle:
        //c) anyway to sub-query?! (look for communityIds term?!)

        if (null == streaming) {
            indexNames.append("recs_*").append(commId.toString()).append("*");
        } else if (streaming) {
            indexNames.append("recs_t_").append(commId.toString()).append("*");
        } else {// !streaming
            indexNames.append("recs_").append(commId.toString());
        } //TESTED

        StringBuffer decomposedIndexes = new StringBuffer();
        boolean needDecomposedIndexes = false;

        HashSet<String> typesAdded = new HashSet<String>();
        if ((null != types) && (null == fromOverride) && (null == toOverride)) { // (types manual, no date filtering - can be much simpler)
            for (String s : types)
                typesAdded.add(s);
        } else {
            // (All this oddly written code is to minimize the number of es types that get exposed, because
            //  they are really badly behaved in terms of bw compatbility)

            if (null != types) {
                for (String s : types)
                    typesAdded.add(s);
            }

            ElasticSearchManager indexMgr = ElasticSearchManager.getIndex("doc_dummy"); // (index guaranteed to exist)
            Object[] indexMetaObj = indexMgr.getRawClient().admin().cluster().prepareState()
                    .setIndices(indexNames.toString()).setRoutingTable(false).setNodes(false)
                    .setListenerThreaded(false).get().getState().getMetaData().getIndices().values().toArray();

            if (null != indexMetaObj)
                for (Object oo : indexMetaObj) {
                    IndexMetaData indexMeta = (IndexMetaData) oo;
                    String indexName = indexMeta.getIndex();

                    if ((null != fromOverride) || (null != toOverride)) {
                        //DEBUG
                        //System.out.println("INDEX: " + indexName);                  

                        Matcher m = dateRegex.matcher(indexName);
                        if (m.find()) {
                            try {
                                Date d = tssdf.parse(m.group());
                                long endpoint = d.getTime() + 24L * 3600L * 1000L - 1;
                                //DEBUG
                                //System.out.println("***************** COMPARE: " + d + " FROM " + fromOverride + " TO " + toOverride + "..errr . " + m.group());

                                if (null != fromOverride) {
                                    if (endpoint < fromOverride.getTime()) { // no overlap on the left
                                        needDecomposedIndexes = true;
                                        continue;
                                    }
                                } //TESTED
                                if (null != toOverride) {
                                    if (d.getTime() > toOverride.getTime()) { // no overlap on the right
                                        needDecomposedIndexes = true;
                                        continue;
                                    }
                                } //TESTED

                            } catch (ParseException e) {
                                // just carry on, odd index name, it happens
                                needDecomposedIndexes = true;
                                continue;
                            }
                        }
                    } //TESTED (end loop over time checking)

                    if (null == types) {
                        Iterator<String> typesIt = indexMeta.getMappings().keysIt();
                        while (typesIt.hasNext()) {
                            String type = typesIt.next();
                            if (!type.equals("_default_")) {
                                typesAdded.add(type);
                            }
                        }
                    }
                    if (0 != decomposedIndexes.length()) {
                        decomposedIndexes.append(',');
                    }
                    decomposedIndexes.append(indexName);

                } //(end loop over indexes)
        } //(end if need to derive the types from the indexes)                

        if (needDecomposedIndexes) { // (because we filtered some indexes out)
            indexNames = decomposedIndexes;
        }
        if (0 == indexNames.length()) {
            continue; // nothing to do here...
        }

        int numTypesAdded = 0;
        if (typesAdded.isEmpty()) { // there doesn't seem to be any types associated with this set of indexes
            continue; // (ie don't add)
        } else
            for (String type : typesAdded) {
                if (numTypesAdded > 0) {
                    indexNames.append(",");
                } else {
                    indexNames.append("/");
                }
                numTypesAdded++;
                indexNames.append(type);
            }

        if (overallIndexNames.length() > 0) {
            overallIndexNames.append(",,");
        }
        overallIndexNames.append(indexNames);

    } //(end loop over community)
      //TESTED (by hand)

    if (0 == overallIndexNames.length()) {
        throw new RuntimeException(
                "Communities contained no types, either all indexes empty, or index is corrupt");
    } //TESTED (by hand)

    //DEBUG
    //System.out.println("INDEXES = " + overallIndexNames.toString());

    config.set("es.resource", overallIndexNames.toString());
    config.set("es.index.read.missing.as.empty", "yes");

    //proxy if running in debug mode:
    if (InfiniteEsInputFormat.LOCAL_DEBUG_MODE) {
        config.set("es.net.proxy.http.host", "localhost");
        config.set("es.net.proxy.http.port", "8888");
    } //TESTED (by hand)            

}