Example usage for com.mongodb BasicDBObject containsField

Introduction

In this page you can find the example usage for com.mongodb BasicDBObject containsField.

Prototype

public boolean containsField(final String field)

Source Link

Document

Checks if this object contains a given field

Usage

From source file:com.ikanow.infinit.e.core.mapreduce.HadoopJobRunner.java

License:Open Source License

private void createConfigXML(Writer out, String title, String input, String fields, boolean isCustomTable,
        String outputDatabase, String output, String tempOutputCollection, String mapper, String reducer,
        String combiner, String query, List<ObjectId> communityIds, String outputKey, String outputValue,
        String arguments) throws IOException {
    String dbserver = prop_general.getDatabaseServer();
    output = outputDatabase + "." + tempOutputCollection;

    int nSplits = 8;
    int nDocsPerSplit = 12500;

    //add communities to query if this is not a custom table
    if (!isCustomTable) {
        // Start with the old query:
        BasicDBObject oldQueryObj = null;
        if (query.startsWith("{")) {
            oldQueryObj = (BasicDBObject) com.mongodb.util.JSON.parse(query);
        } else {/*from w w w  . java2  s  . c  o  m*/
            oldQueryObj = new BasicDBObject();
        }

        // Community Ids aren't indexed in the metadata collection, but source keys are, so we need to transform to that
        BasicDBObject keyQuery = new BasicDBObject(SourcePojo.communityIds_,
                new BasicDBObject(DbManager.in_, communityIds));
        boolean bAdminOverride = false;
        if (oldQueryObj.containsField("admin")) { // For testing only...
            if (1 == communityIds.size()) {
                ObjectId communityId = communityIds.get(0);
                if (RESTTools.adminLookup(communityId.toString())) {
                    bAdminOverride = true;
                    if (oldQueryObj.containsField("max.splits")) {
                        nSplits = oldQueryObj.getInt("max.splits");
                    }
                    if (oldQueryObj.containsField("max.docs.per.split")) {
                        nDocsPerSplit = oldQueryObj.getInt("max.docs.per.split");
                    }
                }
            }
        } //(end diagnostic/benchmarking/test code for admins only part 1)
        if (bAdminOverride) {
            oldQueryObj = (BasicDBObject) oldQueryObj.get("admin");
            //(end diagnostic/benchmarking/test code for admins only part 2)
        } else if (oldQueryObj.containsField(DocumentPojo.sourceKey_) || input.startsWith("feature.")) {
            // Source Key specified by user, stick communityIds check in for security
            oldQueryObj.put(DocumentPojo.communityId_, new BasicDBObject(DbManager.in_, communityIds));
        } else { // Source key not specified by user, transform communities->sourcekeys
            BasicDBObject keyFields = new BasicDBObject(SourcePojo.key_, 1);
            DBCursor dbc = MongoDbManager.getIngest().getSource().find(keyQuery, keyFields);
            if (dbc.count() > 500) {
                // (too many source keys let's keep the query size sensible...)
                oldQueryObj.put(DocumentPojo.communityId_, new BasicDBObject(DbManager.in_, communityIds));
            } else {
                HashSet<String> sourceKeys = new HashSet<String>();
                while (dbc.hasNext()) {
                    DBObject dbo = dbc.next();
                    String sourceKey = (String) dbo.get(SourcePojo.key_);
                    if (null != sourceKey) {
                        sourceKeys.add(sourceKey);
                    }
                }
                if (sourceKeys.isEmpty()) { // query returns empty
                    throw new RuntimeException("Communities contain no sources");
                }
                BasicDBObject newQueryClauseObj = new BasicDBObject(DbManager.in_, sourceKeys);
                // Now combine the queries...
                oldQueryObj.put(DocumentPojo.sourceKey_, newQueryClauseObj);

            } // (end if too many source keys across the communities)
        } //(end if need to break source keys down into communities)
        query = oldQueryObj.toString();
    } else {
        //get the custom table (and database)
        input = getCustomDbAndCollection(input);
    }
    if (arguments == null)
        arguments = "";

    // Generic configuration
    out.write("<?xml version=\"1.0\"?>\n<configuration>");

    // Mongo specific configuration

    out.write("\n\t<property><!-- name of job shown in jobtracker --><name>mongo.job.name</name><value>" + title
            + "</value></property>"
            + "\n\t<property><!-- run the job verbosely ? --><name>mongo.job.verbose</name><value>true</value></property>"
            + "\n\t<property><!-- Run the job in the foreground and wait for response, or background it? --><name>mongo.job.background</name><value>false</value></property>"
            + "\n\t<property><!-- If you are reading from mongo, the URI --><name>mongo.input.uri</name><value>mongodb://"
            + dbserver + "/" + input + "</value></property>"
            + "\n\t<property><!-- If you are writing to mongo, the URI --><name>mongo.output.uri</name><value>mongodb://"
            + dbserver + "/" + output + "</value>  </property>"
            + "\n\t<property><!-- The query, in JSON, to execute [OPTIONAL] --><name>mongo.input.query</name><value>"
            + query + "</value></property>"
            + "\n\t<property><!-- The fields, in JSON, to read [OPTIONAL] --><name>mongo.input.fields</name><value>"
            + ((fields == null) ? ("") : fields) + "</value></property>"
            + "\n\t<property><!-- A JSON sort specification for read [OPTIONAL] --><name>mongo.input.sort</name><value></value></property>"
            + "\n\t<property><!-- The number of documents to limit to for read [OPTIONAL] --><name>mongo.input.limit</name><value>0</value><!-- 0 == no limit --></property>"
            + "\n\t<property><!-- The number of documents to skip in read [OPTIONAL] --><!-- TODO - Are we running limit() or skip() first? --><name>mongo.input.skip</name><value>0</value> <!-- 0 == no skip --></property>"
            + "\n\t<property><!-- Class for the mapper --><name>mongo.job.mapper</name><value>" + mapper
            + "</value></property>"
            + "\n\t<property><!-- Reducer class --><name>mongo.job.reducer</name><value>" + reducer
            + "</value></property>"
            + "\n\t<property><!-- InputFormat Class --><name>mongo.job.input.format</name><value>com.ikanow.infinit.e.data_model.custom.InfiniteMongoInputFormat</value></property>"
            + "\n\t<property><!-- OutputFormat Class --><name>mongo.job.output.format</name><value>com.mongodb.hadoop.MongoOutputFormat</value></property>"
            + "\n\t<property><!-- Output key class for the output format --><name>mongo.job.output.key</name><value>"
            + outputKey + "</value></property>"
            + "\n\t<property><!-- Output value class for the output format --><name>mongo.job.output.value</name><value>"
            + outputValue + "</value></property>"
            + "\n\t<property><!-- Output key class for the mapper [optional] --><name>mongo.job.mapper.output.key</name><value></value></property>"
            + "\n\t<property><!-- Output value class for the mapper [optional] --><name>mongo.job.mapper.output.value</name><value></value></property>"
            + "\n\t<property><!-- Class for the combiner [optional] --><name>mongo.job.combiner</name><value>"
            + combiner + "</value></property>"
            + "\n\t<property><!-- Partitioner class [optional] --><name>mongo.job.partitioner</name><value></value></property>"
            + "\n\t<property><!-- Sort Comparator class [optional] --><name>mongo.job.sort_comparator</name><value></value></property>"
            + "\n\t<property><!-- Split Size [optional] --><name>mongo.input.split_size</name><value>32</value></property>");

    // Infinit.e specific configuration

    out.write("\n\t<property><!-- User Arguments [optional] --><name>arguments</name><value>"
            + StringEscapeUtils.escapeXml(arguments) + "</value></property>"
            + "\n\t<property><!-- Maximum number of splits [optional] --><name>max.splits</name><value>"
            + nSplits + "</value></property>"
            + "\n\t<property><!-- Maximum number of docs per split [optional] --><name>max.docs.per.split</name><value>"
            + nDocsPerSplit + "</value></property>");

    // Closing thoughts:
    out.write("\n</configuration>");

    out.flush();
    out.close();
}

From source file:com.ikanow.infinit.e.data_model.custom.InfiniteMongoSplitter.java

License:Apache License

@SuppressWarnings("unchecked")
public static List<InputSplit> calculateSplits_phase2(InfiniteMongoConfig conf, BasicDBObject confQuery,
        boolean alwaysUseChunks, boolean newShardScheme, Integer splitDocCount) {
    alwaysUseChunks &= (conf.getMaxSplits() != MAX_SPLITS);
    // (in standalone mode, never use chunks)

    MongoURI uri = conf.getInputURI();//from  w  w w  .  j a  v  a  2s.com
    DBCollection coll = InfiniteMongoConfigUtil.getCollection(uri);
    if (conf.getLimit() > 0) {
        return calculateManualSplits(conf, confQuery, 1, conf.getLimit(), coll);
    } else {
        if (!alwaysUseChunks) {
            int nMaxCount = 1 + conf.getMaxDocsPerSplit() * conf.getMaxSplits();
            int count = 0;
            if (null == splitDocCount) {
                if (nMaxCount <= 1) {
                    nMaxCount = 0;
                } else {
                    //DEBUG
                    //System.out.println(coll.find(confQuery).limit(1).explain());

                    count = (int) coll.getCount(confQuery, null, nMaxCount, 0);
                    if (0 == count) {
                        return new ArrayList<InputSplit>();
                    }
                } //TESTED
            } else {
                count = splitDocCount;
            }

            //if maxdocssplit and maxsplits is set and there are less documents than splits*docspersplit then use the new splitter
            //otherwise use the old splitter
            if (conf.getMaxDocsPerSplit() > 0 && conf.getMaxSplits() > 0 && (count < nMaxCount)) {
                _logger.debug("Calculating splits manually");
                int splits_needed = (count / conf.getMaxDocsPerSplit()) + 1;

                return calculateManualSplits(conf, confQuery, splits_needed, conf.getMaxDocsPerSplit(), coll);
            } //TESTED
        }
        if (newShardScheme && !confQuery.containsField(DocumentPojo.sourceKey_)) {
            // OK if we're going to do the sharded version then we will want to calculate
            splitPrecalculations_newShardScheme(confQuery, null); // (modifies confQuery if returns true)            
        } //TESTED: checked did nothing when had sourceKey, added sourceKey when necessary (eg entities.index case)

        if (!newShardScheme) { // unlike new sharding scheme, in this case the query is fixed, so overwrite now:
            conf.setQuery(confQuery);
        }

        List<InputSplit> splits = MongoSplitter.calculateSplits(conf);
        // (unless manually set, like above, runs with the _original_ query)
        int initialSplitSize = splits.size();

        // We have the MongoDB-calculated splits, now calculate their intersection vs the query
        @SuppressWarnings("rawtypes")
        Map<String, TreeSet<Comparable>> orderedArraySet = new HashMap<String, TreeSet<Comparable>>();
        @SuppressWarnings("rawtypes")
        Map<String, NavigableSet<Comparable>> orderedArraySet_afterMin = new HashMap<String, NavigableSet<Comparable>>();
        BasicDBObject originalQuery = confQuery;

        ArrayList<InputSplit> newsplits = new ArrayList<InputSplit>(splits.size());
        Iterator<InputSplit> splitIt = splits.iterator();
        while (splitIt.hasNext()) {
            try {
                orderedArraySet_afterMin.clear();

                MongoInputSplit mongoSplit = (MongoInputSplit) splitIt.next();
                BasicDBObject min = (BasicDBObject) mongoSplit.getQuerySpec().get("$min");
                BasicDBObject max = (BasicDBObject) mongoSplit.getQuerySpec().get("$max");

                //DEBUG
                //_logger.info("+----------------- NEW SPLIT ----------------: " + min + " /" + max);
                //System.out.println("+----------------- NEW SPLIT ----------------: " + min + " /" + max);

                if (null != min) { // How does the min fit in with the general query
                    try {
                        if (compareFields(-1, originalQuery, min, max, orderedArraySet,
                                orderedArraySet_afterMin) < 0) {
                            splitIt.remove();
                            continue;
                        }
                    } catch (Exception e) {
                    } // do nothing probably just some comparable issue
                } //TESTED

                if (null != max) { // How does the min fit in with the general query
                    try {
                        if (compareFields(1, originalQuery, max, min, orderedArraySet,
                                orderedArraySet_afterMin) > 0) {
                            splitIt.remove();
                            continue;
                        }
                    } catch (Exception e) {
                    } // do nothing probably just some comparable issue
                } //TESTED

                //DEBUG
                //_logger.info("(retained split)");
                //System.out.println("(retained split)");

                // (don't worry about edge cases, won't happen very often and will just result in a spurious empty mapper)

                ////////////////////////////////

                // Now some infinit.e specific processing...

                if (newShardScheme) {
                    @SuppressWarnings("rawtypes")
                    TreeSet<Comparable> sourceKeyOrderedArray = orderedArraySet.get(DocumentPojo.sourceKey_);
                    if ((null != sourceKeyOrderedArray) && !sourceKeyOrderedArray.isEmpty()) {
                        @SuppressWarnings("rawtypes")
                        Comparable minSourceKey = null;
                        Object minSourceKeyObj = (null == min) ? null : min.get(DocumentPojo.sourceKey_);
                        if (minSourceKeyObj instanceof String) {
                            minSourceKey = (String) minSourceKeyObj;
                        }
                        if (null == minSourceKey) {
                            minSourceKey = sourceKeyOrderedArray.first();
                        } //TESTED
                        @SuppressWarnings("rawtypes")
                        Comparable maxSourceKey = null;
                        Object maxSourceKeyObj = (null == max) ? null : max.get(DocumentPojo.sourceKey_);
                        if (maxSourceKeyObj instanceof String) {
                            maxSourceKey = (String) maxSourceKeyObj;
                        }
                        if (null == maxSourceKey) {
                            maxSourceKey = sourceKeyOrderedArray.last();
                        } //TESTED

                        DBObject splitQuery = mongoSplit.getQuerySpec();
                        BasicDBObject splitQueryQuery = new BasicDBObject(
                                (BasicBSONObject) splitQuery.get("$query"));
                        if (0 == minSourceKey.compareTo(maxSourceKey)) { // single matching sourceKEy
                            splitQueryQuery.put(DocumentPojo.sourceKey_, maxSourceKey);
                        } //TESTED (array of sources, only one matches)
                        else { // multiple matching source keys
                            splitQueryQuery.put(DocumentPojo.sourceKey_, new BasicDBObject(DbManager.in_,
                                    sourceKeyOrderedArray.subSet(minSourceKey, true, maxSourceKey, true)));
                        } //TESTED (array of sources, multiple match)               
                        newsplits.add(
                                new InfiniteMongoInputSplit(mongoSplit, splitQueryQuery, conf.isNoTimeout()));
                    } else { // original query is of sufficient simplicity
                        newsplits.add(
                                new InfiniteMongoInputSplit(mongoSplit, originalQuery, conf.isNoTimeout()));
                    } //TESTED (no change to existing source)

                } //TESTED
                else { // old sharding scheme, remove min/max and replace with normal _id based query where possible

                    DBObject splitQuery = mongoSplit.getQuerySpec();
                    // Step 1: create a query range for _id:
                    BasicDBObject idRange = null;
                    Object idMin = (min == null) ? null : min.get(DocumentPojo._id_);
                    Object idMax = (max == null) ? null : max.get(DocumentPojo._id_);
                    if (!(idMin instanceof ObjectId))
                        idMin = null;
                    if (!(idMax instanceof ObjectId))
                        idMax = null;

                    if ((null != idMin) || (null != idMax)) {
                        idRange = new BasicDBObject();
                        if (null != idMin) {
                            idRange.put(DbManager.gte_, idMin);
                        }
                        if (null != idMax) {
                            idRange.put(DbManager.lt_, idMax);
                        }
                    } //TESTED                  

                    // Step 2: merge with whatever we have at the moment:
                    if (null != idRange) {
                        BasicDBObject splitQueryQuery = new BasicDBObject(
                                (BasicBSONObject) splitQuery.get("$query"));
                        Object idQueryElement = splitQueryQuery.get(DocumentPojo._id_);
                        boolean convertedAwayFromMinMax = false;
                        if (null == idQueryElement) { // nice and easy, add _id range
                            splitQueryQuery.put(DocumentPojo._id_, idRange);
                            convertedAwayFromMinMax = true;
                        } //TESTED
                        else if (!splitQueryQuery.containsField(DbManager.and_)) { // OK we're going to just going to make life easy
                            splitQueryQuery.remove(DocumentPojo._id_);
                            splitQueryQuery.put(DbManager.and_,
                                    Arrays.asList(new BasicDBObject(DocumentPojo._id_, idQueryElement),
                                            new BasicDBObject(DocumentPojo._id_, idRange)));
                            convertedAwayFromMinMax = true;
                        } //TESTED
                          // (else stick with min/max)

                        if (convertedAwayFromMinMax) { // can construct an _id query
                            splitQuery.removeField("$min");
                            splitQuery.removeField("$max");
                        } //TESTED
                        splitQuery.put("$query", splitQueryQuery);
                    }
                    newsplits.add(new InfiniteMongoInputSplit(mongoSplit, conf.isNoTimeout()));
                } //TESTED         
            } catch (Exception e) {
                //DEBUG
                //e.printStackTrace();
            } // do nothing must be some other type of input split
        } //TESTED

        //DEBUG
        //System.out.println("Calculating splits via mongo-hadoop: " + initialSplitSize + " reduced to " + splits.size());

        _logger.info("Calculating (converted) splits via mongo-hadoop: " + initialSplitSize + " reduced to "
                + newsplits.size());
        return newsplits;
    }
}

From source file:com.ikanow.infinit.e.data_model.custom.InfiniteMongoSplitter.java

License:Apache License

private static boolean isQueryNonTrivial(BasicDBObject query) {
    if ((query.size() > 3) || ((query.size() > 2) && !query.containsField(DocumentPojo.sourceKey_))) {
        return true;
    }/*from   w w w .  j a  v  a2  s  .co  m*/
    return false;
}

From source file:com.ikanow.infinit.e.processing.custom.launcher.CustomHadoopTaskLauncher.java

License:Open Source License

@SuppressWarnings({ "unchecked", "rawtypes" })
public String runHadoopJob(CustomMapReduceJobPojo job, String tempJarLocation)
        throws IOException, SAXException, ParserConfigurationException {
    StringWriter xml = new StringWriter();
    String outputCollection = job.outputCollectionTemp;// (non-append mode) 
    if ((null != job.appendResults) && job.appendResults)
        outputCollection = job.outputCollection; // (append mode, write directly in....)
    else if (null != job.incrementalMode)
        job.incrementalMode = false; // (not allowed to be in incremental mode and not update mode)

    createConfigXML(xml, job.jobtitle, job.inputCollection,
            InfiniteHadoopUtils.getQueryOrProcessing(job.query, InfiniteHadoopUtils.QuerySpec.INPUTFIELDS),
            job.isCustomTable, job.getOutputDatabase(), job._id.toString(), outputCollection, job.mapper,
            job.reducer, job.combiner,/*w w  w  . ja va  2 s.c om*/
            InfiniteHadoopUtils.getQueryOrProcessing(job.query, InfiniteHadoopUtils.QuerySpec.QUERY),
            job.communityIds, job.outputKey, job.outputValue, job.arguments, job.incrementalMode,
            job.submitterID, job.selfMerge, job.outputCollection, job.appendResults);

    ClassLoader savedClassLoader = Thread.currentThread().getContextClassLoader();

    URLClassLoader child = new URLClassLoader(new URL[] { new File(tempJarLocation).toURI().toURL() },
            savedClassLoader);
    Thread.currentThread().setContextClassLoader(child);

    // Check version: for now, any infinit.e.data_model with an VersionTest class is acceptable
    boolean dataModelLoaded = true;
    try {
        URLClassLoader versionTest = new URLClassLoader(new URL[] { new File(tempJarLocation).toURI().toURL() },
                null);
        try {
            Class.forName("com.ikanow.infinit.e.data_model.custom.InfiniteMongoInputFormat", true, versionTest);
        } catch (ClassNotFoundException e2) {
            //(this is fine, will use the cached version)
            dataModelLoaded = false;
        }
        if (dataModelLoaded)
            Class.forName("com.ikanow.infinit.e.data_model.custom.InfiniteMongoVersionTest", true, versionTest);
    } catch (ClassNotFoundException e1) {
        throw new RuntimeException(
                "This JAR is compiled with too old a version of the data-model, please recompile with Jan 2014 (rc2) onwards");
    }

    // Now load the XML into a configuration object: 
    Configuration config = new Configuration();
    // Add the client configuration overrides:
    if (!bLocalMode) {
        String hadoopConfigPath = props_custom.getHadoopConfigPath() + "/hadoop/";
        config.addResource(new Path(hadoopConfigPath + "core-site.xml"));
        config.addResource(new Path(hadoopConfigPath + "mapred-site.xml"));
        config.addResource(new Path(hadoopConfigPath + "hadoop-site.xml"));
    } //TESTED

    try {
        DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
        DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
        Document doc = dBuilder.parse(new ByteArrayInputStream(xml.toString().getBytes()));
        NodeList nList = doc.getElementsByTagName("property");

        for (int temp = 0; temp < nList.getLength(); temp++) {
            Node nNode = nList.item(temp);
            if (nNode.getNodeType() == Node.ELEMENT_NODE) {
                Element eElement = (Element) nNode;
                String name = getTagValue("name", eElement);
                String value = getTagValue("value", eElement);
                if ((null != name) && (null != value)) {
                    config.set(name, value);
                }
            }
        }
    } catch (Exception e) {
        throw new IOException(e.getMessage());
    }

    // Some other config defaults:
    // (not sure if these are actually applied, or derived from the defaults - for some reason they don't appear in CDH's client config)
    config.set("mapred.map.tasks.speculative.execution", "false");
    config.set("mapred.reduce.tasks.speculative.execution", "false");
    // (default security is ignored here, have it set via HADOOP_TASKTRACKER_CONF in cloudera)

    // Now run the JAR file
    try {
        BasicDBObject advancedConfigurationDbo = null;
        try {
            advancedConfigurationDbo = (null != job.query)
                    ? ((BasicDBObject) com.mongodb.util.JSON.parse(job.query))
                    : (new BasicDBObject());
        } catch (Exception e) {
            advancedConfigurationDbo = new BasicDBObject();
        }
        boolean esMode = advancedConfigurationDbo.containsField("qt") && !job.isCustomTable;
        if (esMode && !job.inputCollection.equals("doc_metadata.metadata")) {
            throw new RuntimeException(
                    "Infinit.e Queries are only supported on doc_metadata - use MongoDB queries instead.");
        }

        config.setBoolean("mapred.used.genericoptionsparser", true); // (just stops an annoying warning from appearing)
        if (bLocalMode) { // local job tracker and FS mode
            config.set("mapred.job.tracker", "local");
            config.set("fs.default.name", "local");
        } else {
            if (bTestMode) { // run job tracker locally but FS mode remotely
                config.set("mapred.job.tracker", "local");
            } else { // normal job tracker
                String trackerUrl = HadoopUtils.getXMLProperty(
                        props_custom.getHadoopConfigPath() + "/hadoop/mapred-site.xml", "mapred.job.tracker");
                config.set("mapred.job.tracker", trackerUrl);
            }
            String fsUrl = HadoopUtils.getXMLProperty(
                    props_custom.getHadoopConfigPath() + "/hadoop/core-site.xml", "fs.default.name");
            config.set("fs.default.name", fsUrl);
        }
        if (!dataModelLoaded && !(bTestMode || bLocalMode)) { // If running distributed and no data model loaded then add ourselves
            Path jarToCache = InfiniteHadoopUtils.cacheLocalFile("/opt/infinite-home/lib/",
                    "infinit.e.data_model.jar", config);
            DistributedCache.addFileToClassPath(jarToCache, config);
            jarToCache = InfiniteHadoopUtils.cacheLocalFile("/opt/infinite-home/lib/",
                    "infinit.e.processing.custom.library.jar", config);
            DistributedCache.addFileToClassPath(jarToCache, config);
        } //TESTED

        // Debug scripts (only if they exist), and only in non local/test mode
        if (!bLocalMode && !bTestMode) {

            try {
                Path scriptToCache = InfiniteHadoopUtils.cacheLocalFile("/opt/infinite-home/scripts/",
                        "custom_map_error_handler.sh", config);
                config.set("mapred.map.task.debug.script", "custom_map_error_handler.sh " + job.jobtitle);
                config.set("mapreduce.map.debug.script", "custom_map_error_handler.sh " + job.jobtitle);
                DistributedCache.createSymlink(config);
                DistributedCache.addCacheFile(scriptToCache.toUri(), config);
            } catch (Exception e) {
            } // just carry on

            try {
                Path scriptToCache = InfiniteHadoopUtils.cacheLocalFile("/opt/infinite-home/scripts/",
                        "custom_reduce_error_handler.sh", config);
                config.set("mapred.reduce.task.debug.script", "custom_reduce_error_handler.sh " + job.jobtitle);
                config.set("mapreduce.reduce.debug.script", "custom_reduce_error_handler.sh " + job.jobtitle);
                DistributedCache.createSymlink(config);
                DistributedCache.addCacheFile(scriptToCache.toUri(), config);
            } catch (Exception e) {
            } // just carry on

        } //TODO (???): TOTEST

        // (need to do these 2 things here before the job is created, at which point the config class has been copied across)
        //1)
        Class<?> mapperClazz = Class.forName(job.mapper, true, child);
        if (ICustomInfiniteInternalEngine.class.isAssignableFrom(mapperClazz)) { // Special case: internal custom engine, so gets an additional integration hook
            ICustomInfiniteInternalEngine preActivities = (ICustomInfiniteInternalEngine) mapperClazz
                    .newInstance();
            preActivities.preTaskActivities(job._id, job.communityIds, config, !(bTestMode || bLocalMode));
        } //TESTED
          //2)
        if (job.inputCollection.equalsIgnoreCase("file.binary_shares")) {
            // Need to download the GridFSZip file
            try {
                Path jarToCache = InfiniteHadoopUtils.cacheLocalFile("/opt/infinite-home/lib/unbundled/",
                        "GridFSZipFile.jar", config);
                DistributedCache.addFileToClassPath(jarToCache, config);
            } catch (Throwable t) {
            } // (this is fine, will already be on the classpath .. otherwise lots of other stuff will be failing all over the place!)            
        }

        if (job.inputCollection.equals("records")) {

            InfiniteElasticsearchHadoopUtils.handleElasticsearchInput(job, config, advancedConfigurationDbo);

            //(won't run under 0.19 so running with "records" should cause all sorts of exceptions)

        } //TESTED (by hand)         

        if (bTestMode || bLocalMode) { // If running locally, turn "snappy" off - tomcat isn't pointing its native library path in the right place
            config.set("mapred.map.output.compression.codec", "org.apache.hadoop.io.compress.DefaultCodec");
        }

        // Manually specified caches
        List<URL> localJarCaches = InfiniteHadoopUtils.handleCacheList(advancedConfigurationDbo.get("$caches"),
                job, config, props_custom);

        Job hj = new Job(config); // (NOTE: from here, changes to config are ignored)
        try {

            if (null != localJarCaches) {
                if (bLocalMode || bTestMode) {
                    Method method = URLClassLoader.class.getDeclaredMethod("addURL", new Class[] { URL.class });
                    method.setAccessible(true);
                    method.invoke(child, localJarCaches.toArray());

                } //TOTEST (tested logically)
            }
            Class<?> classToLoad = Class.forName(job.mapper, true, child);
            hj.setJarByClass(classToLoad);

            if (job.inputCollection.equalsIgnoreCase("filesystem")) {
                String inputPath = null;
                try {
                    inputPath = MongoDbUtil.getProperty(advancedConfigurationDbo, "file.url");
                    if (!inputPath.endsWith("/")) {
                        inputPath = inputPath + "/";
                    }
                } catch (Exception e) {
                }
                if (null == inputPath) {
                    throw new RuntimeException("Must specify 'file.url' if reading from filesystem.");
                }
                inputPath = InfiniteHadoopUtils.authenticateInputDirectory(job, inputPath);

                InfiniteFileInputFormat.addInputPath(hj, new Path(inputPath + "*/*")); // (that extra bit makes it recursive)
                InfiniteFileInputFormat.setMaxInputSplitSize(hj, 33554432); // (32MB)
                InfiniteFileInputFormat.setInfiniteInputPathFilter(hj, config);
                hj.setInputFormatClass((Class<? extends InputFormat>) Class.forName(
                        "com.ikanow.infinit.e.data_model.custom.InfiniteFileInputFormat", true, child));
            } else if (job.inputCollection.equalsIgnoreCase("file.binary_shares")) {

                String[] oidStrs = null;
                try {
                    String inputPath = MongoDbUtil.getProperty(advancedConfigurationDbo, "file.url");
                    Pattern oidExtractor = Pattern.compile("inf://share/([^/]+)");
                    Matcher m = oidExtractor.matcher(inputPath);
                    if (m.find()) {
                        oidStrs = m.group(1).split("\\s*,\\s*");

                    } else {
                        throw new RuntimeException(
                                "file.url must be in format inf://share/<oid-list>/<string>: " + inputPath);
                    }
                    InfiniteHadoopUtils.authenticateShareList(job, oidStrs);
                } catch (Exception e) {
                    throw new RuntimeException(
                            "Authentication error: " + e.getMessage() + ": " + advancedConfigurationDbo, e);
                }

                hj.getConfiguration().setStrings("mapred.input.dir", oidStrs);
                hj.setInputFormatClass((Class<? extends InputFormat>) Class.forName(
                        "com.ikanow.infinit.e.data_model.custom.InfiniteShareInputFormat", true, child));
            } else if (job.inputCollection.equals("records")) {
                hj.setInputFormatClass((Class<? extends InputFormat>) Class
                        .forName("com.ikanow.infinit.e.data_model.custom.InfiniteEsInputFormat", true, child));
            } else {
                if (esMode) {
                    hj.setInputFormatClass((Class<? extends InputFormat>) Class.forName(
                            "com.ikanow.infinit.e.processing.custom.utils.InfiniteElasticsearchMongoInputFormat",
                            true, child));
                } else {
                    hj.setInputFormatClass((Class<? extends InputFormat>) Class.forName(
                            "com.ikanow.infinit.e.data_model.custom.InfiniteMongoInputFormat", true, child));
                }
            }
            if ((null != job.exportToHdfs) && job.exportToHdfs) {

                //TODO (INF-2469): Also, if the output key is BSON then also run as text (but output as JSON?)

                Path outPath = InfiniteHadoopUtils.ensureOutputDirectory(job, props_custom);

                if ((null != job.outputKey) && (null != job.outputValue)
                        && job.outputKey.equalsIgnoreCase("org.apache.hadoop.io.text")
                        && job.outputValue.equalsIgnoreCase("org.apache.hadoop.io.text")) {
                    // (slight hack before I sort out the horrendous job class - if key/val both text and exporting to HDFS then output as Text)
                    hj.setOutputFormatClass((Class<? extends OutputFormat>) Class
                            .forName("org.apache.hadoop.mapreduce.lib.output.TextOutputFormat", true, child));
                    TextOutputFormat.setOutputPath(hj, outPath);
                } //TESTED
                else {
                    hj.setOutputFormatClass((Class<? extends OutputFormat>) Class.forName(
                            "org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat", true, child));
                    SequenceFileOutputFormat.setOutputPath(hj, outPath);
                } //TESTED
            } else { // normal case, stays in MongoDB
                hj.setOutputFormatClass((Class<? extends OutputFormat>) Class.forName(
                        "com.ikanow.infinit.e.data_model.custom.InfiniteMongoOutputFormat", true, child));
            }
            hj.setMapperClass((Class<? extends Mapper>) mapperClazz);
            String mapperOutputKeyOverride = advancedConfigurationDbo.getString("$mapper_key_class", null);
            if (null != mapperOutputKeyOverride) {
                hj.setMapOutputKeyClass(Class.forName(mapperOutputKeyOverride));
            } //TESTED 

            String mapperOutputValueOverride = advancedConfigurationDbo.getString("$mapper_value_class", null);
            if (null != mapperOutputValueOverride) {
                hj.setMapOutputValueClass(Class.forName(mapperOutputValueOverride));
            } //TESTED 

            if ((null != job.reducer) && !job.reducer.startsWith("#") && !job.reducer.equalsIgnoreCase("null")
                    && !job.reducer.equalsIgnoreCase("none")) {
                hj.setReducerClass((Class<? extends Reducer>) Class.forName(job.reducer, true, child));
                // Variable reducers:
                if (null != job.query) {
                    try {
                        hj.setNumReduceTasks(advancedConfigurationDbo.getInt("$reducers", 1));
                    } catch (Exception e) {
                        try {
                            // (just check it's not a string that is a valid int)
                            hj.setNumReduceTasks(
                                    Integer.parseInt(advancedConfigurationDbo.getString("$reducers", "1")));
                        } catch (Exception e2) {
                        }
                    }
                } //TESTED
            } else {
                hj.setNumReduceTasks(0);
            }
            if ((null != job.combiner) && !job.combiner.startsWith("#")
                    && !job.combiner.equalsIgnoreCase("null") && !job.combiner.equalsIgnoreCase("none")) {
                hj.setCombinerClass((Class<? extends Reducer>) Class.forName(job.combiner, true, child));
            }
            hj.setOutputKeyClass(Class.forName(job.outputKey, true, child));
            hj.setOutputValueClass(Class.forName(job.outputValue, true, child));

            hj.setJobName(job.jobtitle);
            currJobName = job.jobtitle;
        } catch (Error e) { // (messing about with class loaders = lots of chances for errors!)
            throw new RuntimeException(e.getMessage(), e);
        }
        if (bTestMode || bLocalMode) {
            hj.submit();
            currThreadId = null;
            Logger.getRootLogger().addAppender(this);
            currLocalJobId = hj.getJobID().toString();
            currLocalJobErrs.setLength(0);
            while (!hj.isComplete()) {
                Thread.sleep(1000);
            }
            Logger.getRootLogger().removeAppender(this);
            if (hj.isSuccessful()) {
                if (this.currLocalJobErrs.length() > 0) {
                    return "local_done: " + this.currLocalJobErrs.toString();
                } else {
                    return "local_done";
                }
            } else {
                return "Error: " + this.currLocalJobErrs.toString();
            }
        } else {
            hj.submit();
            String jobId = hj.getJobID().toString();
            return jobId;
        }
    } catch (Exception e) {
        e.printStackTrace();
        Thread.currentThread().setContextClassLoader(savedClassLoader);
        return "Error: " + InfiniteHadoopUtils.createExceptionMessage(e);
    } finally {
        Thread.currentThread().setContextClassLoader(savedClassLoader);
    }
}

From source file:com.ikanow.infinit.e.processing.custom.launcher.CustomHadoopTaskLauncher.java

License:Open Source License

private void createConfigXML(Writer out, String title, String input, String fields, boolean isCustomTable,
        String outputDatabase, String output, String tempOutputCollection, String mapper, String reducer,
        String combiner, String query, List<ObjectId> communityIds, String outputKey, String outputValue,
        String arguments, Boolean incrementalMode, ObjectId userId, Boolean selfMerge,
        String originalOutputCollection, Boolean appendResults) throws IOException {
    String dbserver = prop_general.getDatabaseServer();
    output = outputDatabase + "." + tempOutputCollection;

    boolean isAdmin = AuthUtils.isAdmin(userId);

    int nSplits = 8;
    int nDocsPerSplit = 12500;

    //add communities to query if this is not a custom table
    BasicDBObject oldQueryObj = null;
    BasicDBObject srcTags = null;//from  w  w  w. j  a  v  a  2 s .  c om
    // Start with the old query:
    if (query.startsWith("{")) {
        oldQueryObj = (BasicDBObject) com.mongodb.util.JSON.parse(query);
    } else {
        oldQueryObj = new BasicDBObject();
    }
    boolean elasticsearchQuery = oldQueryObj.containsField("qt") && !isCustomTable;
    int nLimit = 0;
    if (oldQueryObj.containsField("$limit")) {
        nLimit = oldQueryObj.getInt("$limit");
        oldQueryObj.remove("$limit");
    }
    if (oldQueryObj.containsField("$splits")) {
        nSplits = oldQueryObj.getInt("$splits");
        oldQueryObj.remove("$splits");
    }
    if (oldQueryObj.containsField("$srctags")) {
        srcTags = new BasicDBObject(SourcePojo.tags_, oldQueryObj.get("$srctags"));
        oldQueryObj.remove("$srctags");
    }
    if (bLocalMode) { // If in local mode, then set this to a large number so we always run inside our limit/split version
        // (since for some reason MongoInputFormat seems to fail on large collections)
        nSplits = InfiniteMongoSplitter.MAX_SPLITS;
    }
    if (oldQueryObj.containsField("$docsPerSplit")) {
        nDocsPerSplit = oldQueryObj.getInt("$docsPerSplit");
        oldQueryObj.remove("$docsPerSplit");
    }
    oldQueryObj.remove("$fields");
    oldQueryObj.remove("$output");
    oldQueryObj.remove("$reducers");
    String mapperKeyClass = oldQueryObj.getString("$mapper_key_class", "");
    String mapperValueClass = oldQueryObj.getString("$mapper_value_class", "");
    oldQueryObj.remove("$mapper_key_class");
    oldQueryObj.remove("$mapper_value_class");
    String cacheList = null;
    Object cacheObj = oldQueryObj.get("$caches");
    if (null != cacheObj) {
        cacheList = cacheObj.toString(); // (either array of strings, or single string)
        if (!cacheList.startsWith("[")) {
            cacheList = "[" + cacheList + "]"; // ("must" now be valid array)
        }
        oldQueryObj.remove("$caches");
    } //TESTED

    if (null != nDebugLimit) { // (debug mode override)
        nLimit = nDebugLimit;
    }
    boolean tmpIncMode = (null != incrementalMode) && incrementalMode;

    Date fromOverride = null;
    Date toOverride = null;
    Object fromOverrideObj = oldQueryObj.remove("$tmin");
    Object toOverrideObj = oldQueryObj.remove("$tmax");
    if (null != fromOverrideObj) {
        fromOverride = InfiniteHadoopUtils.dateStringFromObject(fromOverrideObj, true);
    }
    if (null != toOverrideObj) {
        toOverride = InfiniteHadoopUtils.dateStringFromObject(toOverrideObj, false);
    }

    if (!isCustomTable) {
        if (elasticsearchQuery) {
            oldQueryObj.put("communityIds", communityIds);
            //tmin/tmax not supported - already have that capability as part of the query
        } else {
            if (input.equals("feature.temporal")) {
                if ((null != fromOverride) || (null != toOverride)) {
                    oldQueryObj.put("value.maxTime",
                            InfiniteHadoopUtils.createDateRange(fromOverride, toOverride, true));
                } //TESTED
                oldQueryObj.put("_id.c", new BasicDBObject(DbManager.in_, communityIds));
            } else {
                oldQueryObj.put(DocumentPojo.communityId_, new BasicDBObject(DbManager.in_, communityIds));
                if ((null != fromOverride) || (null != toOverride)) {
                    oldQueryObj.put("_id",
                            InfiniteHadoopUtils.createDateRange(fromOverride, toOverride, false));
                } //TESTED         
                if (input.equals("doc_metadata.metadata")) {
                    oldQueryObj.put(DocumentPojo.index_, new BasicDBObject(DbManager.ne_, "?DEL?")); // (ensures not soft-deleted)
                }
            }
        }
    } else {
        if ((null != fromOverride) || (null != toOverride)) {
            oldQueryObj.put("_id", InfiniteHadoopUtils.createDateRange(fromOverride, toOverride, false));
        } //TESTED
          //get the custom table (and database)
        input = CustomOutputManager.getCustomDbAndCollection(input);
    }
    query = oldQueryObj.toString();

    if (arguments == null)
        arguments = "";

    // Generic configuration
    out.write("<?xml version=\"1.0\"?>\n<configuration>");

    // Mongo specific configuration
    out.write("\n\t<property><!-- name of job shown in jobtracker --><name>mongo.job.name</name><value>" + title
            + "</value></property>"
            + "\n\t<property><!-- run the job verbosely ? --><name>mongo.job.verbose</name><value>true</value></property>"
            + "\n\t<property><!-- Run the job in the foreground and wait for response, or background it? --><name>mongo.job.background</name><value>false</value></property>"
            + "\n\t<property><!-- If you are reading from mongo, the URI --><name>mongo.input.uri</name><value>mongodb://"
            + dbserver + "/" + input + "</value></property>"
            + "\n\t<property><!-- If you are writing to mongo, the URI --><name>mongo.output.uri</name><value>mongodb://"
            + dbserver + "/" + output + "</value>  </property>"
            + "\n\t<property><!-- The query, in JSON, to execute [OPTIONAL] --><name>mongo.input.query</name><value>"
            + StringEscapeUtils.escapeXml(query) + "</value></property>"
            + "\n\t<property><!-- The fields, in JSON, to read [OPTIONAL] --><name>mongo.input.fields</name><value>"
            + ((fields == null) ? ("") : fields) + "</value></property>"
            + "\n\t<property><!-- A JSON sort specification for read [OPTIONAL] --><name>mongo.input.sort</name><value></value></property>"
            + "\n\t<property><!-- The number of documents to limit to for read [OPTIONAL] --><name>mongo.input.limit</name><value>"
            + nLimit + "</value><!-- 0 == no limit --></property>"
            + "\n\t<property><!-- The number of documents to skip in read [OPTIONAL] --><!-- TODO - Are we running limit() or skip() first? --><name>mongo.input.skip</name><value>0</value> <!-- 0 == no skip --></property>"
            + "\n\t<property><!-- Class for the mapper --><name>mongo.job.mapper</name><value>" + mapper
            + "</value></property>"
            + "\n\t<property><!-- Reducer class --><name>mongo.job.reducer</name><value>" + reducer
            + "</value></property>"
            + "\n\t<property><!-- InputFormat Class --><name>mongo.job.input.format</name><value>com.ikanow.infinit.e.data_model.custom.InfiniteMongoInputFormat</value></property>"
            + "\n\t<property><!-- OutputFormat Class --><name>mongo.job.output.format</name><value>com.ikanow.infinit.e.data_model.custom.InfiniteMongoOutputFormat</value></property>"
            + "\n\t<property><!-- Output key class for the output format --><name>mongo.job.output.key</name><value>"
            + outputKey + "</value></property>"
            + "\n\t<property><!-- Output value class for the output format --><name>mongo.job.output.value</name><value>"
            + outputValue + "</value></property>"
            + "\n\t<property><!-- Output key class for the mapper [optional] --><name>mongo.job.mapper.output.key</name><value>"
            + mapperKeyClass + "</value></property>"
            + "\n\t<property><!-- Output value class for the mapper [optional] --><name>mongo.job.mapper.output.value</name><value>"
            + mapperValueClass + "</value></property>"
            + "\n\t<property><!-- Class for the combiner [optional] --><name>mongo.job.combiner</name><value>"
            + combiner + "</value></property>"
            + "\n\t<property><!-- Partitioner class [optional] --><name>mongo.job.partitioner</name><value></value></property>"
            + "\n\t<property><!-- Sort Comparator class [optional] --><name>mongo.job.sort_comparator</name><value></value></property>"
            + "\n\t<property><!-- Split Size [optional] --><name>mongo.input.split_size</name><value>32</value></property>");

    // Infinit.e specific configuration

    out.write("\n\t<property><!-- User Arguments [optional] --><name>infinit.e.userid</name><value>"
            + StringEscapeUtils.escapeXml(userId.toString()) + "</value></property>"
            + "\n\t<property><!-- User Arguments [optional] --><name>arguments</name><value>"
            + StringEscapeUtils.escapeXml(arguments) + "</value></property>"
            + "\n\t<property><!-- Maximum number of splits [optional] --><name>max.splits</name><value>"
            + nSplits + "</value></property>"
            + "\n\t<property><!-- Maximum number of docs per split [optional] --><name>max.docs.per.split</name><value>"
            + nDocsPerSplit + "</value></property>"
            + "\n\t<property><!-- Infinit.e incremental mode [optional] --><name>update.incremental</name><value>"
            + tmpIncMode + "</value></property>"
            + "\n\t<property><!-- Infinit.e quick admin check [optional] --><name>infinit.e.is.admin</name><value>"
            + isAdmin + "</value></property>"
            + "\n\t<property><!-- Infinit.e userid [optional] --><name>infinit.e.userid</name><value>" + userId
            + "</value></property>");
    if (null != cacheList) {
        out.write(
                "\n\t<property><!-- Infinit.e cache list [optional] --><name>infinit.e.cache.list</name><value>"
                        + cacheList + "</value></property>");
    } //TESTED
    if (null != srcTags) {
        out.write(
                "\n\t<property><!-- Infinit.e src tags filter [optional] --><name>infinit.e.source.tags.filter</name><value>"
                        + srcTags.toString() + "</value></property>");
    }

    if (null != selfMerge && selfMerge && originalOutputCollection != null) {
        originalOutputCollection = "mongodb://" + dbserver + "/" + outputDatabase + "."
                + originalOutputCollection;
        out.write(
                "\n\t<property><!-- This jobs output collection for passing into the mapper along with input collection [optional] --><name>infinit.e.selfMerge</name><value>"
                        + originalOutputCollection + "</value></property>");
    }

    // Closing thoughts:
    out.write("\n</configuration>");

    out.flush();
    out.close();
}

From source file:com.ikanow.infinit.e.utility.MongoAssociationFeatureTxfer.java

License:Apache License

private void doTransfer(BasicDBObject query, int nSkip, int nLimit, BasicDBObject chunk) {
    ElasticSearchManager elasticManager = null;

    // Initialize the DB:
    DBCollection eventFeatureDB = DbManager.getFeature().getAssociation();

    // Initialize the ES (create the index if it doesn't already):

    // 1. Set-up the entity feature index 

    ElasticSearchManager.setDefaultClusterName("infinite-aws");

    // (delete the index)
    //elasticManager = ElasticSearchManager.getIndex("association_index");
    //elasticManager.deleteMe();

    // Create the index if necessary
    String sMapping = new Gson().toJson(new AssociationFeaturePojoIndexMap.Mapping(),
            AssociationFeaturePojoIndexMap.Mapping.class);
    Builder localSettings = ImmutableSettings.settingsBuilder();
    localSettings.put("number_of_shards", 1).put("number_of_replicas", 0);
    localSettings.put("index.analysis.analyzer.suggestAnalyzer.tokenizer", "standard");
    localSettings.putArray("index.analysis.analyzer.suggestAnalyzer.filter", "standard", "lowercase");

    elasticManager = ElasticSearchManager.createIndex("association_index", null, false, null, sMapping,
            localSettings);/*from  w  w  w.ja  v a  2s. c om*/

    // Get the index (necessary if already created)
    if (null == elasticManager) {
        elasticManager = ElasticSearchManager.getIndex("association_index");
    }

    // Now query the DB:

    DBCursor dbc = null;
    dbc = eventFeatureDB.find(query);
    if (null != chunk) {
        if (chunk.containsField(DbManager.min_)) {
            dbc = dbc.addSpecial(DbManager.min_, chunk.get(DbManager.min_));
        }
        if (chunk.containsField(DbManager.max_)) {
            dbc = dbc.addSpecial(DbManager.max_, chunk.get(DbManager.max_));
        }
    }
    dbc = dbc.skip(nSkip).limit(nLimit).batchSize(1000);
    if (null == chunk) {
        int nCount = dbc.count() - nSkip;
        if (nCount < 0)
            nCount = 0;
        System.out.println(
                "Found " + nCount + " records to sync, process first " + (0 == nLimit ? nCount : nLimit));
        if (0 == nCount) { // Nothing to do...
            return;
        }
    }

    List<AssociationFeaturePojo> events = new LinkedList<AssociationFeaturePojo>();

    int nSynced = 0;

    // Loop over array and invoke the cleansing function for each one
    while (dbc.hasNext()) {
        BasicDBObject dbo = (BasicDBObject) dbc.next();
        AssociationFeaturePojo evt = AssociationFeaturePojo.fromDb(dbo, AssociationFeaturePojo.class);

        // If this table has just been rebuilt from the document then the indexes are all wrong ...
        // recalculate and save
        if ('#' == evt.getIndex().charAt(0)) {
            AssociationPojo singleEvt = new AssociationPojo();
            singleEvt.setEntity1_index(evt.getEntity1_index());
            singleEvt.setEntity2_index(evt.getEntity2_index());
            singleEvt.setVerb_category(evt.getVerb_category());
            singleEvt.setGeo_index(evt.getGeo_index());
            evt.setIndex(AssociationAggregationUtils.getEventFeatureIndex(singleEvt));
            eventFeatureDB
                    .update(new BasicDBObject("_id", dbo.get("_id")),
                            new BasicDBObject(MongoDbManager.set_,
                                    new BasicDBObject(AssociationFeaturePojo.index_, evt.getIndex())),
                            false, true);
            // (has to be a multi-update even though it's unique because it's sharded on index)
        }

        // Handle groups (system group is: "4c927585d591d31d7b37097a")
        if (null == evt.getCommunityId()) {
            evt.setCommunityId(new ObjectId("4c927585d591d31d7b37097a"));
        }
        // Bulk add prep
        events.add(evt);
        nSynced++;

        if (events.size() > 1000) {
            elasticManager.bulkAddDocuments(IndexManager.mapListToIndex(events,
                    AssociationFeaturePojo.listType(), new AssociationFeaturePojoIndexMap()), "_id", null,
                    true);
            events.clear();
        }
    }
    // End loop over entities

    //write whatevers left
    elasticManager.bulkAddDocuments(IndexManager.mapListToIndex(events, AssociationFeaturePojo.listType(),
            new AssociationFeaturePojoIndexMap()), "_id", null, true);

    if (null != chunk) {
        System.out.println("Found " + nSynced + " records to sync in chunk");
    }
}

From source file:com.ikanow.infinit.e.utility.MongoDocumentTxfer.java

License:Apache License

private void doTransfer(BasicDBObject query, int nSkip, int nLimit, boolean bAggregate, BasicDBObject chunk)
        throws IOException {
    PropertiesManager pm = new PropertiesManager();
    int nMaxContentSize_bytes = pm.getMaxContentSize();

    // Initialize the DB:

    DBCollection docsDB = DbManager.getDocument().getMetadata();
    DBCollection contentDB = DbManager.getDocument().getContent();
    DBCollection sourcesDB = DbManager.getIngest().getSource();

    ElasticSearchManager.setDefaultClusterName("infinite-aws");

    // 1. Get the documents from the DB (combining data + metadata and refreshing source meta)

    // (Ignore soft-deleted records:)
    if (null == query) {
        query = new BasicDBObject();
    }//from ww w . j  a va  2 s .c  om
    Object sourceKeyQueryTerm = query.remove(DocumentPojo.sourceKey_);
    if (null != sourceKeyQueryTerm) {
        if (query.toString()
                .contains(new StringBuffer('"').append(DocumentPojo.sourceKey_).append('"').toString())) {
            throw new RuntimeException(
                    "Can't specify sourceKey as part of complex query term: " + query.toString());
        } //TESTED (by hand, "{ \"sourceKey\": \"x\", \"$or\": [ { \"sourceKey\": \"x\" } ] }")

        if (sourceKeyQueryTerm instanceof String) {
            query.put(DocumentPojo.sourceKey_,
                    SourcePojo.getDistributedKeyQueryTerm((String) sourceKeyQueryTerm));
        } //TESTED (by hand, "{\"sourceKey\": \"feeds.arstechnica.com.arstechnica.index.11.2.\" }")
        else if (sourceKeyQueryTerm instanceof DBObject) { // find all the _sources_ matching this term, and convert to a big list including distribution
            BasicDBObject fields = new BasicDBObject(SourcePojo.key_, 1);
            fields.put(SourcePojo.highestDistributionFactorStored_, 1);
            DBCursor dbc = sourcesDB.find(new BasicDBObject(SourcePojo.key_, sourceKeyQueryTerm), fields);
            LinkedList<String> sourceKeys = new LinkedList<String>();
            for (DBObject dbo : dbc) {
                String key = (String) dbo.get(SourcePojo.key_);
                Integer distributionFactor = (Integer) dbo.get(SourcePojo.highestDistributionFactorStored_);
                Collection<String> sourceKeysForSource = SourcePojo.getDistributedKeys(key, distributionFactor);
                sourceKeys.addAll(sourceKeysForSource);
            }
            query.put(DocumentPojo.sourceKey_, new BasicDBObject(DbManager.in_, sourceKeys));
        } //TESTED (by hand, "{\"sourceKey\": { \"$gt\": \"dev.ikanow\" } }")
        else {
            throw new RuntimeException("Can't specify sourceKey as part of complex query term");
        } //(actually not possible, just included here for mathematical completeness...)         
    } else {
        if (query.toString()
                .contains(new StringBuffer('"').append(DocumentPojo.sourceKey_).append('"').toString())) {
            throw new RuntimeException("Can't specify sourceKey as part of complex query term");
        } //TESTE (by hand, "{ \"$or\": [ { \"sourceKey\": \"x\" } ] }")

        // Optimize communityId into sourceKeys...
        if (null != query.get(DocumentPojo.communityId_)) {
            try {
                ObjectId commId = query.getObjectId(DocumentPojo.communityId_);
                BasicDBObject fields = new BasicDBObject(SourcePojo.key_, 1);
                fields.put(SourcePojo.highestDistributionFactorStored_, 1);
                DBCursor dbc = sourcesDB.find(new BasicDBObject(SourcePojo.communityIds_, commId), fields);
                LinkedList<String> sourceKeys = new LinkedList<String>();
                int added = 0;
                for (DBObject dbo : dbc) {
                    String key = (String) dbo.get(SourcePojo.key_);
                    Integer distributionFactor = (Integer) dbo.get(SourcePojo.highestDistributionFactorStored_);
                    Collection<String> sourceKeysForSource = SourcePojo.getDistributedKeys(key,
                            distributionFactor);
                    sourceKeys.addAll(sourceKeysForSource);
                    added += sourceKeysForSource.size();
                }
                query.put(DocumentPojo.sourceKey_, new BasicDBObject(DbManager.in_, sourceKeys));

                System.out.println("(Optimized simple community query to " + added + " source key(s))");
            } catch (Exception e) {
                //DEBUG
                //e.printStackTrace();

                System.out.println("(Can't optimize complex community query: " + e.getMessage());
            }
        } //TESTED (by hand - including distributed source version)
    }
    // Ignored delete objects
    Object urlQuery = query.get(DocumentPojo.url_);
    if (null == urlQuery) {
        query.put(DocumentPojo.url_, Pattern.compile("^[^?]")); // (ie nothing starting with ?)
    } //TESTED
    else if (urlQuery instanceof BasicDBObject) {
        ((BasicDBObject) urlQuery).append("$regex", "^[^?]");
    } //TESTED
      //DEBUG
      //System.out.println("COMBINED QUERY= " + query.toString());

    // If aggregating, kick off the background aggregation thread
    if (bAggregate) {
        EntityBackgroundAggregationManager.startThread();
        AssociationBackgroundAggregationManager.startThread();
    }

    //Debug:
    DBCursor dbc = null;
    dbc = docsDB.find(query);
    if (null != chunk) {
        if (chunk.containsField(DbManager.min_)) {
            dbc = dbc.addSpecial(DbManager.min_, chunk.get(DbManager.min_));
        }
        if (chunk.containsField(DbManager.max_)) {
            dbc = dbc.addSpecial(DbManager.max_, chunk.get(DbManager.max_));
        }
    }
    dbc = dbc.skip(nSkip).limit(nLimit).batchSize(1000);
    if (null == chunk) {
        int nCount = dbc.count() - nSkip;
        if (nCount < 0)
            nCount = 0;
        System.out.println(
                "Found " + nCount + " records to sync, process first " + (0 == nLimit ? nCount : nLimit));
        if (0 == nCount) { // Nothing to do...
            return;
        }
    }

    byte[] storageArray = new byte[200000];

    int nSynced = 0;
    LinkedList<DocumentPojo> docsToTransfer = new LinkedList<DocumentPojo>();
    Map<ObjectId, LinkedList<DocumentPojo>> communityList = null;
    ObjectId currCommunityId = null;
    while (dbc.hasNext()) {
        BasicDBObject dbo = (BasicDBObject) dbc.next();
        DocumentPojo doc = DocumentPojo.fromDb(dbo, DocumentPojo.class);
        String sDocIndex = doc.getIndex();
        if (null == sDocIndex) {
            sDocIndex = "document_index";
        }
        if ((null != _deletedIndex) && !_deletedIndex.contains(sDocIndex)) {
            _deletedIndex.add(sDocIndex);
            rebuildIndex(sDocIndex);
            try { // (Just in case the index requires some time to sort itself out)
                Thread.sleep(1000);
            } catch (InterruptedException e) {
            }
        }

        //Debug:
        //System.out.println("Getting content..." + feed.getTitle() + " / " + feed.getUrl());

        // Get the content:
        if ((0 != nMaxContentSize_bytes)
                && StoreAndIndexManager.docHasExternalContent(doc.getUrl(), doc.getSourceUrl())) {
            BasicDBObject contentQ = new BasicDBObject(CompressedFullTextPojo.url_, doc.getUrl());
            contentQ.put(CompressedFullTextPojo.sourceKey_,
                    new BasicDBObject(MongoDbManager.in_, Arrays.asList(null, doc.getSourceKey())));
            BasicDBObject fields = new BasicDBObject(CompressedFullTextPojo.gzip_content_, 1);
            fields.put(CompressedFullTextPojo.sourceKey_, 1);

            DBCursor dbcGzip = contentDB.find(contentQ, fields);
            while (dbcGzip.hasNext()) {
                BasicDBObject dboContent = (BasicDBObject) dbcGzip.next();
                if (!dboContent.containsField(CompressedFullTextPojo.sourceKey_)) {
                    // If this has another version then ignore this one...
                    if (dbc.hasNext()) {
                        continue;
                    } //TESTED (by hand)               
                }

                byte[] compressedData = ((byte[]) dboContent.get(CompressedFullTextPojo.gzip_content_));
                ByteArrayInputStream in = new ByteArrayInputStream(compressedData);
                GZIPInputStream gzip = new GZIPInputStream(in);
                int nRead = 0;
                StringBuffer output = new StringBuffer();
                while (nRead >= 0) {
                    nRead = gzip.read(storageArray, 0, 200000);
                    if (nRead > 0) {
                        String s = new String(storageArray, 0, nRead, "UTF-8");
                        output.append(s);
                    }
                }
                doc.setFullText(output.toString());
            }
        }
        // (else document has full text already)

        // Get tags, if necessary:
        // Always overwrite tags - one of the reasons we might choose to migrate
        // Also may need source in order to support source index filtering
        SourcePojo src = _sourceCache.get(doc.getSourceKey());
        if (null == src) {
            //TODO (INF-2265): handle search index settings in pipeline mode... (also didn't seem to work?)
            BasicDBObject srcDbo = (BasicDBObject) sourcesDB
                    .findOne(new BasicDBObject(SourcePojo.key_, doc.getSourceKey()));
            if (null != srcDbo) {
                src = SourcePojo.fromDb(srcDbo, SourcePojo.class);

                if (null != src.getProcessingPipeline()) {
                    try {
                        // Set the index settings
                        HarvestController hc = new HarvestController();
                        HarvestControllerPipeline hcPipe = new HarvestControllerPipeline();
                        hcPipe.extractSource_preProcessingPipeline(src, hc);
                    } catch (Exception e) {
                        //DEBUG
                        e.printStackTrace();
                    }
                } //TESTED (by hand)

                _sourceCache.put(doc.getSourceKey(), src);
            }
        }
        doc.setTempSource(src); // (needed for source index filtering)
        if (null != src) {
            if (null != src.getTags()) {
                Set<String> tagsTidied = new TreeSet<String>();
                for (String s : src.getTags()) {
                    String ss = s.trim().toLowerCase();
                    tagsTidied.add(ss);
                }

                // May also want to write this back to the DB:
                //TODO (INF-2223): Handle append tags or not in the pipeline...
                if ((null == src.getAppendTagsToDocs()) || src.getAppendTagsToDocs()) {
                    if ((null == doc.getTags()) || (doc.getTags().size() < tagsTidied.size())) {
                        BasicDBObject updateQuery = new BasicDBObject(DocumentPojo.sourceKey_,
                                doc.getRawSourceKey()); // (ie including the # if there is one)
                        updateQuery.put(DocumentPojo._id_, doc.getId());
                        docsDB.update(updateQuery,
                                new BasicDBObject(DbManager.addToSet_, new BasicDBObject(DocumentPojo.tags_,
                                        new BasicDBObject(DbManager.each_, tagsTidied))));
                    }
                    doc.setTags(tagsTidied); // (just copy ptr across)
                }
            }
        }

        // 2. Update the index with the new document            

        // (Optionally also update entity and assoc features)

        if (bAggregate) {
            if (null == currCommunityId) {
                currCommunityId = doc.getCommunityId();
            } else if (!currCommunityId.equals(doc.getCommunityId())) {
                LinkedList<DocumentPojo> perCommunityDocList = null;
                if (null == communityList) { // (very first time we see > 1 community)
                    communityList = new TreeMap<ObjectId, LinkedList<DocumentPojo>>();
                    perCommunityDocList = new LinkedList<DocumentPojo>();
                    perCommunityDocList.addAll(docsToTransfer); //(NOT including doc, this hasn't been added to docsToTransfer yet)
                    communityList.put(currCommunityId, perCommunityDocList);
                }
                currCommunityId = doc.getCommunityId();
                perCommunityDocList = communityList.get(currCommunityId);
                if (null == perCommunityDocList) {
                    perCommunityDocList = new LinkedList<DocumentPojo>();
                    communityList.put(currCommunityId, perCommunityDocList);
                }
                perCommunityDocList.add(doc);
            }
        } //TESTED

        nSynced++;
        docsToTransfer.add(doc);
        if (0 == (nSynced % 10000)) {
            StoreAndIndexManager manager = new StoreAndIndexManager();

            if (bAggregate) {
                // Loop over communities and aggregate each one then store the modified entities/assocs               
                doAggregation(communityList, docsToTransfer);
                communityList = null; // (in case the next 10,000 docs are all in the same community!)
                currCommunityId = null;

            } //TOTEST            

            manager.addToSearch(docsToTransfer);
            docsToTransfer.clear();
            System.out.println("(Synced " + nSynced + " records)");
        }

    } // (End loop over docs)

    // Sync remaining docs

    if (!docsToTransfer.isEmpty()) {
        if (bAggregate) {
            // Loop over communities and aggregate each one then store the modified entities/assocs               
            doAggregation(communityList, docsToTransfer);
        }

        StoreAndIndexManager manager = new StoreAndIndexManager();
        manager.addToSearch(docsToTransfer);
    }

    if (null != chunk) {
        System.out.println("Found " + nSynced + " records to sync in chunk");
    }

    if (bAggregate) {
        System.out.println("Completed. You can hit CTRL+C at any time.");
        System.out.println(
                "By default it will keep running for 5 minutes while the background aggregation runs to update the documents' entities.");
        try {
            Thread.sleep(300000);
        } catch (InterruptedException e) {
        }

        // Turn off so we can exit
        EntityBackgroundAggregationManager.stopThreadAndWait();
        AssociationBackgroundAggregationManager.stopThreadAndWait();
    }
}

From source file:com.ikanow.infinit.e.utility.MongoEntityFeatureTxfer.java

License:Apache License

private void doTransfer(BasicDBObject query, int nSkip, int nLimit, BasicDBObject chunk) {
    ElasticSearchManager elasticManager = null;

    // Initialize the DB:
    DBCollection entityFeatureDB = DbManager.getFeature().getEntity();

    // Initialize the ES (create the index if it doesn't already):

    // 1. Set-up the entity feature index 

    String indexName = "entity_index";
    ElasticSearchManager.setDefaultClusterName("infinite-aws");

    // (delete the index)
    //elasticManager = ElasticSearchManager.getIndex(indexName);
    //elasticManager.deleteMe();

    // Create the index if necessary
    String sMapping = new Gson().toJson(new EntityFeaturePojoIndexMap.Mapping(),
            EntityFeaturePojoIndexMap.Mapping.class);
    Builder localSettings = ImmutableSettings.settingsBuilder();
    localSettings.put("number_of_shards", 1).put("number_of_replicas", 0);
    localSettings.put("index.analysis.analyzer.suggestAnalyzer.tokenizer", "standard");
    localSettings.putArray("index.analysis.analyzer.suggestAnalyzer.filter", "standard", "lowercase");

    elasticManager = ElasticSearchManager.createIndex(indexName, null, false, null, sMapping, localSettings);

    // Get the index (necessary if already created)
    if (null == elasticManager) {
        elasticManager = ElasticSearchManager.getIndex(indexName);
    }//w ww . j a  v  a 2 s. co  m

    // Now query the DB:

    DBCursor dbc = null;
    dbc = entityFeatureDB.find(query);
    if (null != chunk) {
        if (chunk.containsField(DbManager.min_)) {
            dbc = dbc.addSpecial(DbManager.min_, chunk.get(DbManager.min_));
        }
        if (chunk.containsField(DbManager.max_)) {
            dbc = dbc.addSpecial(DbManager.max_, chunk.get(DbManager.max_));
        }
    }
    dbc = dbc.skip(nSkip).limit(nLimit).batchSize(1000);
    if (null == chunk) {
        int nCount = dbc.count() - nSkip;
        if (nCount < 0)
            nCount = 0;
        System.out.println(
                "Found " + nCount + " records to sync, process first " + (0 == nLimit ? nCount : nLimit));
        if (0 == nCount) { // Nothing to do...
            return;
        }
    }

    int nSynced = 0;

    List<EntityFeaturePojo> entities = new ArrayList<EntityFeaturePojo>();
    while (dbc.hasNext()) {
        EntityFeaturePojo feature = EntityFeaturePojo.fromDb(dbc.next(), EntityFeaturePojo.class);

        if (null != feature.getAlias()) { // (some corrupt gazateer entry)

            // Handle groups (system group is: "4c927585d591d31d7b37097a")
            // if there is no community id, add system group (something is wrong if this happens?)
            if (null == feature.getCommunityId()) {
                feature.setCommunityId(new ObjectId("4c927585d591d31d7b37097a"));
            }
        }

        entities.add(feature);
        nSynced++;

        // Add the entities
        if (entities.size() > 1000) {
            elasticManager.bulkAddDocuments(IndexManager.mapListToIndex(entities, EntityFeaturePojo.listType(),
                    new EntityFeaturePojoIndexMap()), "_id", null, true);
            // (note EntityFeaturePojoIndexMap creates an "_id" field of the format index:community)

            entities = new ArrayList<EntityFeaturePojo>();
        }
    }
    //write whatevers left
    elasticManager.bulkAddDocuments(IndexManager.mapListToIndex(entities, EntityFeaturePojo.listType(),
            new EntityFeaturePojoIndexMap()), "_id", null, true);
    // (note EntityFeaturePojoIndexMap creates an "_id" field of the format index:community)

    if (null != chunk) {
        System.out.println("Found " + nSynced + " records to sync in chunk");
    }
}

From source file:com.images3.data.impl.MongoDBObjectMapper.java

License:Apache License

public ImageMetricsOS mapToImageMetricsOS(BasicDBObject source) {
    Map<ImageMetricsType, Long> numbers = new HashMap<ImageMetricsType, Long>();
    numbers.put(ImageMetricsType.COUNTS_INBOUND,
            (source.containsField(ImageMetricsType.COUNTS_INBOUND.toString())
                    ? source.getLong(ImageMetricsType.COUNTS_INBOUND.toString())
                    : 0L));/*from  ww  w. j a  v  a  2 s  .co m*/
    numbers.put(ImageMetricsType.COUNTS_OUTBOUND,
            (source.containsField(ImageMetricsType.COUNTS_OUTBOUND.toString())
                    ? source.getLong(ImageMetricsType.COUNTS_OUTBOUND.toString())
                    : 0L));
    numbers.put(ImageMetricsType.SIZE_INBOUND,
            (source.containsField(ImageMetricsType.SIZE_INBOUND.toString())
                    ? source.getLong(ImageMetricsType.SIZE_INBOUND.toString())
                    : 0L));
    numbers.put(ImageMetricsType.SIZE_OUTBOUND,
            (source.containsField(ImageMetricsType.SIZE_OUTBOUND.toString())
                    ? source.getLong(ImageMetricsType.SIZE_OUTBOUND.toString())
                    : 0L));
    return new ImageMetricsOS(source.getString("imagePlantId"), source.getString("templateName"),
            source.getLong("second"), numbers);
}

From source file:com.impetus.client.mongodb.query.MongoDBQuery.java

License:Apache License

/**
 * Creates MongoDB Query object from filterClauseQueue.
 * //  w w  w.ja va  2  s .  c o m
 * @param m
 *            the m
 * @param filterClauseQueue
 *            the filter clause queue
 * @return the basic db object
 */
public BasicDBObject createSubMongoQuery(EntityMetadata m, Queue filterClauseQueue) {
    BasicDBObject query = new BasicDBObject();
    BasicDBObject compositeColumns = new BasicDBObject();

    MetamodelImpl metaModel = (MetamodelImpl) kunderaMetadata.getApplicationMetadata()
            .getMetamodel(m.getPersistenceUnit());

    AbstractManagedType managedType = (AbstractManagedType) metaModel.entity(m.getEntityClazz());

    for (Object object : filterClauseQueue) {
        boolean isCompositeColumn = false;

        boolean isSubCondition = false;

        if (object instanceof FilterClause) {
            FilterClause filter = (FilterClause) object;
            String property = filter.getProperty();
            String condition = filter.getCondition();
            Object value = filter.getValue().get(0);

            // value is string but field.getType is different, then get
            // value using

            Field f = null;

            // if alias is still present .. means it is an enclosing
            // document search.

            if (managedType.hasLobAttribute()) {
                EntityType entity = metaModel.entity(m.getEntityClazz());
                String fieldName = m.getFieldName(property);

                f = (Field) entity.getAttribute(fieldName).getJavaMember();

                if (value.getClass().isAssignableFrom(String.class) && f != null
                        && !f.getType().equals(value.getClass())) {
                    value = PropertyAccessorFactory.getPropertyAccessor(f).fromString(f.getType().getClass(),
                            value.toString());
                }
                value = MongoDBUtils.populateValue(value, value.getClass());

                property = "metadata." + property;
            } else {
                if (((AbstractAttribute) m.getIdAttribute()).getJPAColumnName().equalsIgnoreCase(property)) {
                    property = "_id";
                    f = (Field) m.getIdAttribute().getJavaMember();
                    if (metaModel.isEmbeddable(m.getIdAttribute().getBindableJavaType())
                            && value.getClass().isAssignableFrom(f.getType())) {
                        EmbeddableType compoundKey = metaModel
                                .embeddable(m.getIdAttribute().getBindableJavaType());
                        compositeColumns = MongoDBUtils.getCompoundKeyColumns(m, value, compoundKey);
                        isCompositeColumn = true;
                        continue;
                    }
                } else if (metaModel.isEmbeddable(m.getIdAttribute().getBindableJavaType())
                        && StringUtils.contains(property, '.')) {
                    // Means it is a case of composite column.
                    property = property.substring(property.indexOf(".") + 1);
                    isCompositeColumn = true;
                } /*
                   * if a composite key. "." assuming "." is part of
                   * property in case of embeddable only
                   */
                else if (StringUtils.contains(property, '.')) {
                    EntityType entity = metaModel.entity(m.getEntityClazz());
                    StringTokenizer tokenizer = new StringTokenizer(property, ".");
                    String embeddedAttributeAsStr = tokenizer.nextToken();
                    String embeddableAttributeAsStr = tokenizer.nextToken();
                    Attribute embeddedAttribute = entity.getAttribute(embeddedAttributeAsStr);
                    EmbeddableType embeddableEntity = metaModel
                            .embeddable(((AbstractAttribute) embeddedAttribute).getBindableJavaType());
                    f = (Field) embeddableEntity.getAttribute(embeddableAttributeAsStr).getJavaMember();
                    property = ((AbstractAttribute) embeddedAttribute).getJPAColumnName() + "."
                            + ((AbstractAttribute) embeddableEntity.getAttribute(embeddableAttributeAsStr))
                                    .getJPAColumnName();
                } else {
                    EntityType entity = metaModel.entity(m.getEntityClazz());
                    String discriminatorColumn = ((AbstractManagedType) entity).getDiscriminatorColumn();

                    if (!property.equals(discriminatorColumn)) {
                        String fieldName = m.getFieldName(property);
                        f = (Field) entity.getAttribute(fieldName).getJavaMember();
                    }
                }
                if (value.getClass().isAssignableFrom(String.class) && f != null
                        && !f.getType().equals(value.getClass())) {
                    value = PropertyAccessorFactory.getPropertyAccessor(f).fromString(f.getType().getClass(),
                            value.toString());
                }
                value = MongoDBUtils.populateValue(value, value.getClass());

            }

            // Property, if doesn't exist in entity, may be there in a
            // document embedded within it, so we have to check that
            // TODO: Query should actually be in a format
            // documentName.embeddedDocumentName.column, remove below if
            // block once this is decided

            // Query could be geospatial in nature
            if (f != null && f.getType().equals(Point.class)) {
                GeospatialQuery geospatialQueryimpl = GeospatialQueryFactory
                        .getGeospatialQueryImplementor(condition, value);
                query = (BasicDBObject) geospatialQueryimpl.createGeospatialQuery(property, value, query);

            } else {

                if (isCompositeColumn) {
                    EmbeddableType embeddableType = metaModel
                            .embeddable(m.getIdAttribute().getBindableJavaType());
                    AbstractAttribute attribute = (AbstractAttribute) embeddableType.getAttribute(property);

                    property = new StringBuffer("_id.").append(attribute.getJPAColumnName()).toString();
                }
                if (condition.equals("=")) {
                    query.append(property, value);

                } else if (condition.equalsIgnoreCase("like")) {

                    if (query.containsField(property)) {
                        query.get(property);
                        query.put(property, ((BasicDBObject) query.get(property)).append("$regex",
                                createLikeRegex((String) value)));
                    } else {
                        query.append(property, new BasicDBObject("$regex", createLikeRegex((String) value)));
                    }

                } else if (condition.equalsIgnoreCase(">")) {

                    if (query.containsField(property)) {
                        query.get(property);
                        query.put(property, ((BasicDBObject) query.get(property)).append("$gt", value));
                    } else {
                        query.append(property, new BasicDBObject("$gt", value));
                    }
                } else if (condition.equalsIgnoreCase(">=")) {

                    if (query.containsField(property))

                    {
                        query.get(property);
                        query.put(property, ((BasicDBObject) query.get(property)).append("$gte", value));
                    } else {
                        query.append(property, new BasicDBObject("$gte", value));
                    }

                } else if (condition.equalsIgnoreCase("<")) {

                    if (query.containsField(property)) {
                        query.get(property);
                        query.put(property, ((BasicDBObject) query.get(property)).append("$lt", value));
                    } else {
                        query.append(property, new BasicDBObject("$lt", value));
                    }

                } else if (condition.equalsIgnoreCase("<=")) {

                    if (query.containsField(property)) {
                        query.get(property);
                        query.put(property, ((BasicDBObject) query.get(property)).append("$lte", value));
                    } else {
                        query.append(property, new BasicDBObject("$lte", value));
                    }

                } else if (condition.equalsIgnoreCase("in")) {

                    if (query.containsField(property)) {
                        query.get(property);
                        query.put(property,
                                ((BasicDBObject) query.get(property)).append("$in", filter.getValue()));
                    } else {
                        query.append(property, new BasicDBObject("$in", filter.getValue()));
                    }

                } else if (condition.equalsIgnoreCase("not in")) {

                    if (query.containsField(property)) {
                        query.get(property);
                        query.put(property,
                                ((BasicDBObject) query.get(property)).append("$nin", filter.getValue()));
                    } else {
                        query.append(property, new BasicDBObject("$nin", filter.getValue()));
                    }

                } else if (condition.equalsIgnoreCase("<>")) {

                    if (query.containsField(property)) {
                        query.get(property);
                        query.put(property, ((BasicDBObject) query.get(property)).append("$ne", value));
                    } else {
                        query.append(property, new BasicDBObject("$ne", value));
                    }

                }
            }

            // TODO: Add support for other operators like >, <, >=, <=,
            // order by asc/ desc, limit, skip, count etc
        }
    }
    if (!compositeColumns.isEmpty()) {
        query.append("_id", compositeColumns);
    }

    return query;
}