Example usage for com.mongodb BasicDBObject append

List of usage examples for com.mongodb BasicDBObject append


In this page you can find the example usage for com.mongodb BasicDBObject append.


public BasicDBObject append(final String key, final Object val) 

Source Link


Add a key/value pair to this object


From source file:com.ikanow.infinit.e.data_model.store.MongoDbUtil.java

License:Apache License

public static BasicDBObject encode(JsonObject o) {
    BasicDBObject dbo = new BasicDBObject();
    for (Map.Entry<String, JsonElement> elKV : o.entrySet()) {
        dbo.append(elKV.getKey(), encodeUnknown(elKV.getValue()));
    }/*from   w ww  .j a  va 2 s . com*/
    return dbo;

From source file:com.ikanow.infinit.e.harvest.enrichment.custom.GeoReference.java

License:Open Source License

 * getQuery/* ww  w  .  j a v a  2 s  .  c  o  m*/
 * @param g
 * @param hasGeoindex
 * @param attempt
 * @return
//TODO (INF-1864): running this in non-strict mode can cripple the DB since search field might not
//be set ... at least need to cache such queries (almost always the US every time!)....

private static BasicDBObject getQuery(Boolean hasGeoindex, int attempt) {
    BasicDBObject query = new BasicDBObject();

    // SearchField
    String searchField = (geoInfo.getSearch_field() != null) ? geoInfo.getSearch_field().toLowerCase() : null;

    // Cities are all lower case in the georeference collection, set toLowerCase here
    String city = (geoInfo.getCity() != null) ? geoInfo.getCity().toLowerCase() : null;

    // Use WordUtils.capitalize to set first char of region and country words to Upper Case
    String region = (geoInfo.getRegion() != null) ? WordUtils.capitalize(geoInfo.getRegion()) : null;
    String country = (geoInfo.getCountry() != null) ? WordUtils.capitalize(geoInfo.getCountry()) : null;
    String countryCode = geoInfo.getCountry_code();

    // If the only field sent was the search_field
    if ((searchField != null) && (city == null) && (region == null) && (country == null)
            && (countryCode == null)) {
        query.put("search_field", searchField);

    // Otherwise...
    else {
        switch (attempt) {
        case 1:
            // Set the searchField if it is null
            if (searchField == null && city != null)
                searchField = city.toLowerCase();
            if (searchField == null && region != null)
                searchField = region.toLowerCase();
            if (searchField == null && country != null)
                searchField = country.toLowerCase();

            if (searchField != null)
                query.put("search_field", searchField);
            if (city != null)
                query.put("city", city);
            if (region != null)
                query.put("region", region);
            if (country != null)
                query.put("country", country);
            if (null == searchField) { // only country code specified...
                query.put("city", new BasicDBObject(DbManager.exists_, false));
                query.put("region", new BasicDBObject(DbManager.exists_, false));
            if (countryCode != null)
                query.put("country_code", countryCode);

        case 2:
            if (city != null) {
                query.put("search_field", city.toLowerCase());
                query.put("city", city);
            } else if (region != null) {
                query.put("search_field", region.toLowerCase());
                query.put("region", region);
            } else {
                query.put("search_field", country.toLowerCase());

            if (country != null)
                query.put("country", country);
            if (countryCode != null)
                query.put("country_code", countryCode);

        case 3:
            if (searchField == null && region != null)
                searchField = region.toLowerCase();
            if (searchField == null && country != null)
                searchField = country.toLowerCase();

            if (searchField != null)
                query.put("search_field", searchField);
            if (region != null)
                query.put("region", region);
            if (country != null)
                query.put("country", country);
            if (countryCode != null)
                query.put("country_code", countryCode);

            if (country != null)
                query.put("search_field", country.toLowerCase());
            if (country != null)
                query.put("country", country);
            if (countryCode != null)
                query.put("country_code", countryCode);
    if (query.isEmpty()) {
        return null;

    // Only return records with GeoIndex objects
    if (hasGeoindex) {
        BasicDBObject ne = new BasicDBObject();
        ne.append(DbManager.exists_, true);
        query.put("geoindex", ne);

    return query;

From source file:com.ikanow.infinit.e.harvest.enrichment.custom.GeoReference.java

License:Open Source License

 * getNearestCities/*  w w w. j  a  va2 s  . c o  m*/
 * Get n-cities near a lat/lon pair, results returned ordered by distance from
 * the lat/lon pair
 * @param lat
 * @param lon
 * @param nMaxReturns
 * @return List<GeoReferencePojo>
public static List<GeoFeaturePojo> getNearestCities(DBCollection geoDb, String lat, String lon,
        int nMaxReturns) {
    try {
        // Create Double[] from lat, lon
        Double[] d = new Double[] { Double.parseDouble(lat), Double.parseDouble(lon) };

        // Build query object to return the shell equivalent of:
        // db.georeference.find({geoindex : {$near : [lat.lon]}})
        BasicDBObject query = new BasicDBObject();
        BasicDBObject near = new BasicDBObject();
        near.append("$near", d);
        query.put("geoindex", near);

        // Perform query
        DBCursor result = geoDb.find(query).limit(nMaxReturns);

        // Convert results to List<GeoReferencePojo>
        List<GeoFeaturePojo> gpl = GeoFeaturePojo.listFromDb(result,
                new TypeToken<ArrayList<GeoFeaturePojo>>() {
        return gpl;
    } catch (Exception e) {
        return null;

From source file:com.ikanow.infinit.e.harvest.enrichment.legacy.alchemyapi.AlchemyEntityGeoCleanser.java

License:Open Source License

public void doProcessing(int nSkip, BasicDBObject userQuery, int nLimit, boolean bAlterDB)
        throws NumberFormatException, UnknownHostException, MongoException {

    // Initialization (regexes and stuff)

    // Launch MongoDB query

    BasicDBObject query = userQuery;/*from  w ww .  j a  v  a2 s  .  c  o m*/
    if (null == query) {
        new BasicDBObject();

    // Just get the entity list out to save a few CPU cycles
    BasicDBObject outFields = new BasicDBObject();
    outFields.append(DocumentPojo.entities_, 1);
    outFields.append(DocumentPojo.url_, 1); // (help with debugging)
    outFields.append(DocumentPojo.title_, 1); // (help with debugging) 

    DBCursor dbc = null;
    if (nLimit > 0) {
        dbc = _docsDB.find(query, outFields).limit(nLimit).skip(nSkip);
    } else { // Everything!
        dbc = _docsDB.find(query, outFields).skip(nSkip);

    // Create POJO array of documents (definitely not the most efficient, but 
    // will make integration with the harvester easier)

    List<DocumentPojo> docus = DocumentPojo.listFromDb(dbc, DocumentPojo.listType());

    // Loop over array and invoke the cleansing function for each one

    for (DocumentPojo docu : docus) {
        if (this.cleanseGeoInDocu(docu)) {

            if (bAlterDB) {

                BasicDBObject inner0 = new BasicDBObject(DocumentPojo.entities_,
                        (DBObject) com.mongodb.util.JSON.parse(new Gson().toJson(docu.getEntities())));
                BasicDBObject inner1 = new BasicDBObject(MongoDbManager.set_, inner0);

                // Overwrite the existing entities list with the new one 
                _docsDB.update(new BasicDBObject(DocumentPojo._id_, docu.getId()), inner1, false, true);
                // (need the multi-update in case _id isn't the shard key - documentation claims this is not necessary but 2.4.6/shell still enforces it)

            } //TESTED

From source file:com.ikanow.infinit.e.harvest.enrichment.legacy.alchemyapi.AlchemyEntityGeoCleanser.java

License:Open Source License

public boolean cleanseGeoInDocu(DocumentPojo doc) {

    boolean bChangedAnything = false;

    Map<String, Candidate> dubiousLocations = new HashMap<String, Candidate>();

    Set<String> otherRegions = new HashSet<String>();
    Set<String> otherCountries = new HashSet<String>();
    Set<String> otherCountriesOrRegionsReferenced = new HashSet<String>();

    //Debug/*from   ww w  .j a  v  a2 s.c om*/
    if (_nDebugLevel >= 2) {
                "+++++++ Doc: " + doc.getTitle() + " / " + doc.getId() + " / " + doc.getEntities().size());

    // 1] First off, let's find anything location-based and also determine if it's bad or not 

    if (null != doc.getEntities())
        for (EntityPojo ent : doc.getEntities()) {

            boolean bStrongCandidate = false;

            // People: decompose names
            if (EntityPojo.Dimension.Where == ent.getDimension()) {

                // So locations get disambiguated to one of:
                // "<city-etc>, <region-or-country>", or "<region-or-country>"
                // though can also just be left as they are.

                String sActualName = ent.getActual_name().toLowerCase();
                if (!ent.getDisambiguatedName().toLowerCase().equals(sActualName)) {
                    // It's been disambiguated

                    if (_nDebugLevel >= 3) {
                        System.out.println("disambiguous candidate: " + ent.getDisambiguatedName() + " VS "
                                + ent.getActual_name() + " ("
                                + ((null != ent.getSemanticLinks()) ? ent.getSemanticLinks().size() : 0) + ")");

                    // OK next step, is it a disambiguation to a US town?
                    String splitMe[] = ent.getDisambiguatedName().split(", ");
                    if (2 == splitMe.length) {

                        String stateOrCountry = splitMe[1];
                        Matcher m = _statesRegex.matcher(stateOrCountry);
                        if (m.find()) { // This is a US disambiguation - high risk case
                            // Short cut if state is already directly mentioned?
                            stateOrCountry = stateOrCountry.toLowerCase();

                            if (!otherRegions.contains(stateOrCountry)) { // See list below - no need to go any further

                                // OK next step - is it a possible ambiguity:
                                ArrayList<BasicDBObject> x = new ArrayList<BasicDBObject>();
                                BasicDBObject inner0_0 = new BasicDBObject(MongoDbManager.not_,
                                BasicDBObject inner1_0 = new BasicDBObject("country_code", inner0_0);

                                BasicDBObject inner0_1 = new BasicDBObject(MongoDbManager.gte_, 400000);
                                BasicDBObject inner1_1 = new BasicDBObject("population", inner0_1);

                                BasicDBObject dbo = new BasicDBObject();
                                dbo.append("search_field", sActualName);
                                dbo.append(MongoDbManager.or_, x);

                                DBCursor dbc = _georefDB.find(dbo);
                                if (dbc.size() >= 1) { // Problems!

                                    //Create list of candidates

                                    Type listType = new TypeToken<LinkedList<GeoFeaturePojo>>() {
                                    LinkedList<GeoFeaturePojo> grpl = new Gson()
                                            .fromJson(dbc.toArray().toString(), listType);

                                    if (_nDebugLevel >= 2) {
                                        System.out.println("\tERROR CANDIDATE: " + ent.getDisambiguatedName()
                                                + " VS " + ent.getActual_name() + " (" + dbc.count() + ")");

                                        if (_nDebugLevel >= 3) {
                                            for (GeoFeaturePojo grp : grpl) {
                                                System.out.println("\t\tCandidate:" + grp.getCity() + " / "
                                                        + grp.getRegion() + " / " + grp.getCountry());

                                    Candidate candidate = new Candidate(ent, grpl, stateOrCountry);
                                    dubiousLocations.put(ent.getIndex(), candidate);
                                    bStrongCandidate = true;

                                } // if strong candidate
                            } //TESTED ("reston, virginia" after "virginia/stateorcounty" mention)
                              // (end if can't shortcut past all this)

                        } // end if a US town
                    } // end if in the format "A, B"

                } // if weak candidate

                if (!bStrongCandidate) { // Obv can't count on a disambiguous candidate:               
                    String type = ent.getType().toLowerCase();

                    if (type.equals("stateorcounty")) {
                        String disName = ent.getDisambiguatedName().toLowerCase();
                        if (_abbrStateRegex.matcher(disName).matches()) {
                        } else {
                        otherCountriesOrRegionsReferenced.add("united states");
                    } //TESTED: "mich./stateorcounty"
                    else if (type.equals("country")) {
                        String disName = ent.getDisambiguatedName().toLowerCase();

                        // Translation of known badly transcribed countries:
                        // (England->UK)
                        if (disName.equals("england")) {
                            otherCountries.add("united kingdom");
                        } //TESTED
                        else {
                    } else if (type.equals("region")) {
                    } else if (type.equals("city")) {
                        String splitMe[] = ent.getDisambiguatedName().split(",\\s*");
                        if (2 == splitMe.length) {
                            if (this._statesRegex.matcher(splitMe[1]).find()) {
                                otherCountriesOrRegionsReferenced.add("united states");
                            } //TESTED: "lexingon, kentucky/city"
                } //TESTED: just above clauses

            } // if location

        } // (end loop over entities)

    // Debug:
    if ((_nDebugLevel >= 3) && (!dubiousLocations.isEmpty())) {
        for (String s : otherRegions) {
            System.out.println("Strong region: " + s);
        for (String s : otherCountries) {
            System.out.println("Strong countries: " + s);
        for (String s : otherCountriesOrRegionsReferenced) {
            System.out.println("Weak regionscountries: " + s);

    // 2] The requirements and algorithm are discussed in 
    // http://ikanow.jira.com/wiki/display/INF/Beta...+improving+AlchemyAPI+extraction+%28geo%29
    // Canonical cases:
    // Darfur -> Darfur, MN even though Sudan and sometimes Darfur, Sudan are present
    // Shanghai -> Shanghai, WV even though China is mentioned (and not WV)
    // Manchester -> Manchester village, NY (not Manchester, UK)
    // Philadelphia -> Philadelphia (village), NY (though NY is mentioned and not PA) 

    // We're generating the following order
    //       10] Sitting tenant with strong direct
    //       15] Large city with strong direct      
    //       20] Region with direct
    //       30] Large city with strong indirect
    //       40] Sitting tenant with strong indirect 
    //       50] Region with indirect
    //       60] Another foreign possibility with strong direct 
    //       70] Large city with weak direct
    //       72] Large city with weak indirect
    //       75] Large city with no reference 
    //       78] Another foreign possibility with strong indirect (>100K population - ie not insignificant) 
    //       80] Sitting tenant with any weak (US) direct or indirect 
    //       90] Another foreign possibility with strong indirect 
    //      100] Another foreign possibility with weak direct 
    //      110] Another foreign possibility with weak indirect 
    //      120] Region with no reference, if there is only 1
    //      130] Sitting tenant with none of the above (ie default)
    //      140] Anything else!

    for (Map.Entry<String, Candidate> pair : dubiousLocations.entrySet()) {
        EntityPojo ent = pair.getValue().entity;
        Candidate candidate = pair.getValue();

        // 2.1] Let's analyse the "sitting tenant"

        int nPrio = 130;
        GeoFeaturePojo currLeader = null;
        int nCase = 0; // (just for debugging, 0=st, 1=large city, 2=region, 3=other)

        if (otherRegions.contains(candidate.state)) { // Strong direct ref, winner!
            nPrio = 10; // winner!
        } //TESTED: "san antonio, texas/city" vs "texas"
        else if (otherCountriesOrRegionsReferenced.contains(candidate.state)) {
            // Indirect ref
            nPrio = 40; // good, but beatable...
        } //TESTED: "philadelphia (village), new york/city" 
        else if (otherCountries.contains("united states")) { // Weak direct ref
            nPrio = 80; // better than nothing...            
        } //TESTED: "apache, oklahoma/city"
        else if (otherCountriesOrRegionsReferenced.contains("united states")) { // Weak indirect ref
            nPrio = 80; // better than nothing...            
        } //TESTED: "washington, d.c." have DC as stateorcounty, but US in countries list

        // Special case: we don't like "village":
        if ((80 != nPrio) && ent.getDisambiguatedName().contains("village")
                && !ent.getActual_name().contains("village")) {
            nPrio = 80;
        } //TESTED: "Downvoted: Philadelphia (village), New York from Philadelphia"

        // Debug
        if (_nDebugLevel >= 2) {
            System.out.println(pair.getKey() + " SittingTenantScore=" + nPrio);

        // Alternatives
        if (nPrio > 10) {

            LinkedList<GeoFeaturePojo> geos = pair.getValue().candidates;
            for (GeoFeaturePojo geo : geos) {

                int nAltPrio = 140;
                int nAltCase = -1;
                String city = (null != geo.getCity()) ? geo.getCity().toLowerCase() : null;
                String region = (null != geo.getRegion()) ? geo.getRegion().toLowerCase() : null;
                String country = (null != geo.getCountry()) ? geo.getCountry().toLowerCase() : null;

                // 2.2] CASE 1: I'm a city with pop > 1M (best score 15)
                //                15] Large city with strong direct      
                //                30] Large city with strong indirect
                //                70] Large city with weak direct
                //                72] Large city with weak indirect
                //                75] Large city with no reference                

                if ((null != city) && (geo.getPopulation() >= 400000) && (nPrio > 15)) {
                    nAltCase = 1;

                    if ((null != region) && (otherRegions.contains(region))) {
                        nAltPrio = 15; // strong direct
                    } //TESTED: "dallas / Texas / United States = 15"
                    else if ((null != region) && (otherCountriesOrRegionsReferenced.contains(region))) {
                        nAltPrio = 30; // strong indirect
                    } //TESTED: "sacramento / California / United State"
                    else if ((null != country) && (otherCountries.contains(country))) {
                        nAltPrio = 70; // weak direct 
                    } //TESTED: "berlin, germany", with "germany" directly mentioned
                    else if ((null != country) && (otherCountriesOrRegionsReferenced.contains(country))) {
                        nAltPrio = 72; // weak indirect 
                    } //TESTED: "los angeles / California / United States = 72"
                    else {
                        nAltPrio = 75; // just for being big!
                    } //TESTED: "barcelona, spain"

                // 2.3] CASE 2: I'm a region (best score=20, can beat current score)
                //                20] Region with direct
                //                50] Region with indirect
                //               120] Region with no reference, if there is only 1

                else if ((null == city) && (nPrio > 20)) {
                    nAltCase = 2;

                    if ((null != country) && (otherCountries.contains(country))) {
                        nAltPrio = 20; // strong direct 
                    } //TESTED: (region) "Berlin, Germany" with "Germany" mentioned
                    else if ((null != country) && (otherCountriesOrRegionsReferenced.contains(country))) {
                        nAltPrio = 50; // strong indirect 
                    } //(haven't seen, but we'll live)
                    else {
                        nAltPrio = 120; // (just for being there)
                    } //TESTED: "null / Portland / Jamaica = 120", also "Shanghai / China"

                // 2.4] CASE 3: I'm any foreign possibility (best score=60)
                //                60] Another foreign possibility with strong direct 
                //                78] Another foreign possibility with strong indirect (>100K population - ie not insignificant) 
                //                90] Another foreign possibility with strong indirect 
                //               100] Another foreign possibility with weak direct 
                //               110] Another foreign possibility with weak indirect 

                else if (nPrio > 60) {
                    nAltCase = 3;

                    if ((null != region) && (otherRegions.contains(region))) {
                        nAltPrio = 60; // strong direct

                        // Double check we're not falling into the trap below:
                        if (!geo.getCountry_code().equals("US")) {
                            Matcher m = this._statesRegex.matcher(geo.getRegion());
                            if (m.matches()) { // non US state matching against (probably) US state, disregard)
                                nAltPrio = 140;
                        } //TESTED (same clause as below)

                    } //TESTED: lol "philadelphia / Maryland / Liberia = 60" (before above extra clause)

                    if (nAltPrio > 60) { // (may need to re-run test)
                        if ((null != country) && (otherCountries.contains(country))) {
                            if (geo.getPopulation() < 100000) {
                                nAltPrio = 90; // strong indirect
                            } //TESTED: "washington / Villa Clara / Cuba"
                            else {
                                nAltPrio = 78; // strong indirect, with boost!                        
                            } //TESTED: "geneva, Geneve, Switzerland", pop 180K
                        } else if ((null != region) && (otherCountriesOrRegionsReferenced.contains(region))) {
                            nAltPrio = 100; // weak direct
                        } //TESTED: "lincoln / Lincolnshire / United Kingdom = 100"
                        else if ((null != country) && (otherCountriesOrRegionsReferenced.contains(country))) {
                            nAltPrio = 110; // weak indirect
                        } //(haven't seen, but we'll live)                  
                // Debug:
                if ((_nDebugLevel >= 2) && (nAltPrio < 140)) {
                    System.out.println("----Alternative: " + geo.getCity() + " / " + geo.getRegion() + " / "
                            + geo.getCountry() + " score=" + nAltPrio);

                // Outcome of results:

                if (nAltPrio < nPrio) {
                    currLeader = geo;
                    nPrio = nAltPrio;
                    nCase = nAltCase;
            } // end loop over alternativse

            if (null != currLeader) { // Need to change

                if (1 == nCase) {

                    //(Cities are lower case in georef DB for some reason)
                    String city = WordUtils.capitalize(currLeader.getCity());

                    if (currLeader.getCountry_code().equals("US")) { // Special case: is this just the original?

                        String region = currLeader.getRegion();
                        if (region.equals("District of Columbia")) { // Special special case
                            region = "D.C.";
                        String sCandidate = city + ", " + region;

                        if (!sCandidate.equals(ent.getDisambiguatedName())) {
                            ent.setIndex(ent.getDisambiguatedName() + "/city");
                            bChangedAnything = true;
                        } //TESTED (lots, eg "Philadelphia (village), New York" -> "Philadelphia, PA"; Wash, Ill. -> Wash DC)
                        else {
                        } //TESTED ("Washington DC", "San Juan, Puerto Rico")
                    } //TESTED (see above)
                    else {
                        ent.setDisambiguatedName(city + ", " + currLeader.getCountry());
                        ent.setIndex(ent.getDisambiguatedName() + "/city");
                        bChangedAnything = true;
                    } //TESTED: "london, california/city to London, United Kingdom"
                } else if (2 == nCase) {
                    ent.setDisambiguatedName(currLeader.getRegion() + ", " + currLeader.getCountry());
                    ent.setIndex(ent.getDisambiguatedName() + "/region");
                    bChangedAnything = true;

                } //TESTED: "Moved madrid, new york/city to Madrid, Spain" (treats Madrid as region, like Berlin see above)
                else {
                    //(Cities are lower case in georef DB for some reason)
                    String city = WordUtils.capitalize(currLeader.getCity());

                    ent.setDisambiguatedName(city + ", " + currLeader.getCountry());
                    ent.setIndex(ent.getDisambiguatedName() + "/city");
                    bChangedAnything = true;

                } //TESTED: "Moved geneva, new york/city to Geneva, Switzerland"

                if ((_nDebugLevel >= 1) && (null == ent.getSemanticLinks())) {
                    System.out.println("++++ Moved " + pair.getKey() + " to " + ent.getDisambiguatedName());
            } else {

        } // (if sitting tenant not holder)

    } // (end loop over candidates)      

    if ((_nDebugLevel >= 1) && bChangedAnything) {
        System.out.println("\t(((Doc: " + doc.getTitle() + " / " + doc.getId() + " / " + doc.getUrl() + ")))");

    return bChangedAnything;

From source file:com.ikanow.infinit.e.harvest.enrichment.legacy.alchemyapi.AlchemyEntityPersonCleanser.java

License:Open Source License

public void doProcessing(int nSkip, BasicDBObject userQuery, int nLimit, boolean bAlterDB)
        throws NumberFormatException, UnknownHostException, MongoException {

    // Initialization (regexes and stuff)

    // Launch MongoDB query

    BasicDBObject query = userQuery;/*from   w  w w.j a v a2s. c o m*/
    if (null == query) {
        new BasicDBObject();

    // Just get the entity list out to save a few CPU cycles
    BasicDBObject outFields = new BasicDBObject();
    outFields.append(DocumentPojo.entities_, 1);
    outFields.append(DocumentPojo.url_, 1); // (help with debugging) 
    outFields.append(DocumentPojo.title_, 1); // (help with debugging) 

    DBCursor dbc = null;
    if (nLimit > 0) {
        dbc = docsDB.find(query, outFields).limit(nLimit).skip(nSkip);
    } else { // Everything!
        dbc = docsDB.find(query, outFields).skip(nSkip);

    // Create POJO array of documents (definitely not the most efficient, but 
    // will make integration with the harvester easier)

    List<DocumentPojo> docus = DocumentPojo.listFromDb(dbc, DocumentPojo.listType());

    // Loop over array and invoke the cleansing function for each one

    for (DocumentPojo docu : docus) {
        if (this.cleansePeopleInDocu(docu)) {

            if (bAlterDB) {

                BasicDBObject inner0 = new BasicDBObject(DocumentPojo.entities_,
                        (DBObject) com.mongodb.util.JSON.parse(new Gson().toJson(docu.getEntities())));
                BasicDBObject inner1 = new BasicDBObject(MongoDbManager.set_, inner0);

                // Overwrite the existing entities list with the new one 
                docsDB.update(new BasicDBObject(DocumentPojo._id_, docu.getId()), inner1, false, true);
                // (need the multi-update in case _id isn't the shard key - documentation claims this is not necessary but 2.4.6/shell still enforces it)

            } //TESTED: checked on "Feed: Japan's Three Elections / 4c92863751cc2e59d612000b / 30"

From source file:com.ikanow.infinit.e.processing.custom.scheduler.CustomScheduleManager.java

License:Open Source License

 * Look for jobs that have not started yet but are scheduled for some point in the future
 *///from  w  ww  .j  av  a  2s  .  com
public static CustomMapReduceJobPojo getJobsToRun(PropertiesManager prop_custom, boolean bLocalMode,
        boolean bHadoopEnabled) {
    try {
        // First off, check the number of running jobs - don't exceed the max
        // (seem to run into memory problems if this isn't limited?)
        if (!availableSlots(prop_custom)) {
            return null;

        BasicDBObject query = new BasicDBObject();
        query.append(CustomMapReduceJobPojo.jobidS_, null);
        query.append(CustomMapReduceJobPojo.waitingOn_, new BasicDBObject(MongoDbManager.size_, 0));
                new BasicDBObject(MongoDbManager.lt_, new Date().getTime()));
        if (!bHadoopEnabled && !bLocalMode) {
            // Can only get shared queries:
            query.append("jarURL", null);
        BasicDBObject updates = new BasicDBObject(CustomMapReduceJobPojo.jobidS_, "");
        updates.append("lastRunTime", new Date());
        BasicDBObject update = new BasicDBObject(MongoDbManager.set_, updates);
        DBObject dbo = DbManager.getCustom().getLookup().findAndModify(query, null, null, false, update, true,

        if (dbo != null) {
            return CustomMapReduceJobPojo.fromDb(dbo, CustomMapReduceJobPojo.class);
    } catch (Exception ex) {
        //oh noes!

    return null;

From source file:com.ikanow.infinit.e.processing.custom.scheduler.CustomScheduleManager.java

License:Open Source License

 * Look for running jobs, decide if they are complete
 *///w  w  w.  ja  v  a2 s  . c  om
public static CustomMapReduceJobPojo getJobsToMakeComplete(boolean bHadoopEnabled,
        Map<ObjectId, String> incompleteJobsMap) {
    try {
        BasicDBObject query = new BasicDBObject();
        BasicDBObject nors[] = new BasicDBObject[3];
        nors[0] = new BasicDBObject(CustomMapReduceJobPojo.jobidS_, null);
        nors[1] = new BasicDBObject(CustomMapReduceJobPojo.jobidS_, "CHECKING_COMPLETION");
        nors[2] = new BasicDBObject(CustomMapReduceJobPojo.jobidS_, "");
        query.put(MongoDbManager.nor_, Arrays.asList(nors));
        BasicDBObject updates = new BasicDBObject(CustomMapReduceJobPojo.jobidS_, "CHECKING_COMPLETION");
        updates.put(CustomMapReduceJobPojo.lastChecked_, new Date());
        BasicDBObject update = new BasicDBObject(MongoDbManager.set_, updates);
        if (!bHadoopEnabled) {
            // Can only get shared queries:
            query.append(CustomMapReduceJobPojo.jarURL_, null);
        DBObject dbo = DbManager.getCustom().getLookup().findAndModify(query, update);

        if (dbo != null) {
            CustomMapReduceJobPojo cmr = CustomMapReduceJobPojo.fromDb(dbo, CustomMapReduceJobPojo.class);
            incompleteJobsMap.put(cmr._id, cmr.jobidS);
            return cmr;
    } catch (Exception ex) {
        //oh noes!

    return null;

From source file:com.ikanow.infinit.e.processing.custom.status.CustomStatusManager.java

License:Open Source License

 * Sets the custom mr pojo to be complete for the
 * current job.  Currently this is done by removing the
 * jobid and updating the next runtime, increments the
 * amount of timeRan counter as well so we can calculate nextRunTime
 * /*from w  w  w.jav  a  2  s  . c  o  m*/
 * Also set lastCompletion time to now (best we can approx)
 * @param cmr
public void setJobComplete(CustomMapReduceJobPojo cmr, boolean isComplete, boolean isError, float mapProgress,
        float reduceProgress, String errorMessage) {
    // First off, if complete then run custom internal engine finish routines:
    if ((null != cmr.mapper) && !cmr.mapper.isEmpty() && !cmr.mapper.equalsIgnoreCase("none")) {
        StringBuffer postTaskActivityErrors = new StringBuffer();
        int errLen = 0;
        if (null != errorMessage) {
            postTaskActivityErrors = new StringBuffer(errorMessage);
            errLen = postTaskActivityErrors.length();
        InfiniteHadoopUtils.handlePostTaskActivities(cmr, isError, postTaskActivityErrors);
        if (postTaskActivityErrors.length() > errLen) {
            errorMessage = postTaskActivityErrors.toString();
    } //TESTED

    // (Note, inc_ and unset_ are added in one place each, so can't use them without ensuring you combine existing uses)  
    BasicDBObject updates = new BasicDBObject();
    BasicDBObject update = new BasicDBObject();
    try {
        long nNew = 0;
        long nTotal = 0;
        if (isComplete) {
            long runtime = new Date().getTime() - cmr.lastRunTime.getTime();
            long timeFromSchedule = cmr.lastRunTime.getTime() - cmr.nextRunTime;

            updates.append(CustomMapReduceJobPojo.jobidS_, null);
            updates.append(CustomMapReduceJobPojo.jobidN_, 0);
            try {
                //if next run time reschedules to run before now, keep rescheduling until its later
                //the server could have been turned off for days and would try to rerun all jobs once a day
                long nextRunTime = CustomScheduleManager.getNextRunTime(cmr.scheduleFreq, cmr.firstSchedule,
                updates.append(CustomMapReduceJobPojo.nextRunTime_, nextRunTime);
            } catch (Exception e) {
            } // just carry on, we'll live...

            updates.append(CustomMapReduceJobPojo.lastCompletionTime_, new Date());
            updates.append(CustomMapReduceJobPojo.tempConfigXMLLocation_, null);
            updates.append(CustomMapReduceJobPojo.tempJarLocation_, null);
            try {
            } catch (Exception e) {
                _logger.info("job_error_removing_tempfiles=" + InfiniteHadoopUtils.createExceptionMessage(e));

            BasicDBObject incs = new BasicDBObject(CustomMapReduceJobPojo.timesRan_, 1);
            //copy depencies to waitingOn
            updates.append(CustomMapReduceJobPojo.waitingOn_, cmr.jobDependencies);
            if (!isError) {
                // Counts and move and output
                nNew = DbManager.getCollection(cmr.getOutputDatabase(), cmr.outputCollectionTemp).count();

                //TODO (INF-1159): this shouldn't really be here but it makes life much easier for now (really should be part of the m/r OutputFormat...) 
                CustomOutputManager.completeOutput(cmr, prop_custom);

                //if job was successfully, mark off dependencies

                // More counts:
                nTotal = DbManager.getCollection(cmr.getOutputDatabase(), cmr.outputCollection).count();

                // Status:
                String completionStatus = "Schedule Delta: " + timeFromSchedule + "ms\nCompletion Time: "
                        + runtime + "ms\nNew Records: " + nNew + "\nTotal Records: " + nTotal;
                if (null == errorMessage) { // (I think will always be the case?)
                    errorMessage = completionStatus;
                } else {
                    errorMessage += "\n" + completionStatus;
                if ((null != cmr.tempErrors) && !cmr.tempErrors.isEmpty()) { // Individual errors reported from map/combine/reduce
                    StringBuffer sb = new StringBuffer(errorMessage).append("\n\nLog Messages:\n\n");
                    for (String err : cmr.tempErrors) {
                    errorMessage = sb.toString();
                    update.put(MongoDbManager.unset_, new BasicDBObject(CustomMapReduceJobPojo.tempErrors_, 1));
                updates.append(CustomMapReduceJobPojo.errorMessage_, errorMessage); // (will often be null)               
            } else {
                if ((null != cmr.tempErrors) && !cmr.tempErrors.isEmpty()) { // Individual errors reported from map/combine/reduce
                    StringBuffer sb = new StringBuffer(errorMessage).append("\n\nLog Messages:\n\n");
                    for (String err : cmr.tempErrors) {
                    errorMessage = sb.toString();
                    update.put(MongoDbManager.unset_, new BasicDBObject(CustomMapReduceJobPojo.tempErrors_, 1));
                //failed, just append error message                              
                updates.append(CustomMapReduceJobPojo.errorMessage_, errorMessage);
                incs.append(CustomMapReduceJobPojo.timesFailed_, 1);
                cmr.timesFailed++; // (so that in memory processes can tell if a job failed)
            update.append(MongoDbManager.inc_, incs);

            if (null != cmr.jobidS) {
                _logger.info("job_completion_title=" + cmr.jobtitle + " job_completion_id=" + cmr._id.toString()
                        + " job_completion_time=" + runtime + " job_schedule_delta=" + timeFromSchedule
                        + " job_completion_success=" + !isError + " job_hadoop_id=" + cmr.jobidS + "_"
                        + cmr.jobidN + " job_new_records=" + nNew + " job_total_records=" + nTotal);
            } else {
                _logger.info("job_completion_title=" + cmr.jobtitle + " job_completion_id=" + cmr._id.toString()
                        + " job_completion_time=" + runtime + " job_schedule_delta=" + timeFromSchedule
                        + " job_completion_success=" + !isError + " job_new_records=" + nNew
                        + " job_total_records=" + nTotal);
        updates.append(CustomMapReduceJobPojo.mapProgress_, mapProgress);
        updates.append(CustomMapReduceJobPojo.reduceProgress_, reduceProgress);
    } catch (Exception ex) {
        _logger.info("job_error_updating_status_title=" + cmr.jobtitle + " job_error_updating_status_id="
                + cmr._id.toString() + " job_error_updating_status_message="
                + InfiniteHadoopUtils.createExceptionMessage(ex));
    } finally { // It's really bad if this doesn't happen, so do it here so that it always gets called
        if (!updates.isEmpty()) {
            update.append(MongoDbManager.set_, updates);
            // (if isComplete, should always include resetting jobidS and jobidN)
            DbManager.getCustom().getLookup().update(new BasicDBObject(CustomMapReduceJobPojo._id_, cmr._id),

            // (also set local version)
            cmr.errorMessage = errorMessage;
        if (isComplete || isError) {
            // If we're derived from a source then update the source:
            if (null != cmr.derivedFromSourceKey) {

                // For a source's first run, need to grab the entire source to check if we need to override the tmin/tmax
                SourcePojo srcJustRun = null;

                if ((isComplete && !isError) && (0 == cmr.timesRan)) {
                    BasicDBObject srcQuery = new BasicDBObject(SourcePojo.key_, cmr.derivedFromSourceKey);
                    srcJustRun = SourcePojo.fromDb(DbManager.getIngest().getSource().findOne(srcQuery),
                    if (null == srcJustRun.getHarvestStatus()) { // (don't allow initial override, if one is set)
                        srcJustRun.setHarvestStatus(new SourceHarvestStatusPojo());

                    if (null != srcJustRun) {
                        try {
                            LinkedList<CustomMapReduceJobPojo> updatedJobs = new LinkedList<CustomMapReduceJobPojo>();
                            SourcePipelineToCustomConversion.convertSourcePipeline(srcJustRun, updatedJobs,
                            for (CustomMapReduceJobPojo cmrUpdate : updatedJobs) {
                                if (cmrUpdate._id.equals(cmr._id)) {
                        } catch (Exception e) {
                        } // just carry on
                } //TESTED (by hand)

                BasicDBObject query = new BasicDBObject(SourcePojo.key_, cmr.derivedFromSourceKey);
                BasicDBObject setUpdate = new BasicDBObject(SourceHarvestStatusPojo.sourceQuery_harvest_status_,
                        isError ? HarvestEnum.error.toString() : HarvestEnum.success.toString());
                if (null != cmr.errorMessage) {
                    setUpdate.put(SourceHarvestStatusPojo.sourceQuery_harvest_message_, cmr.errorMessage);
                BasicDBObject srcUpdate = new BasicDBObject(DbManager.set_, setUpdate);
                DbManager.getIngest().getSource().update(query, srcUpdate, false, false);
        } //TESTED (by hand)

From source file:com.ikanow.infinit.e.processing.custom.status.CustomStatusManager.java

License:Open Source License

 * Updates the status of the current, active, job
 *///from w  ww . jav a 2s . c  o  m
public void updateJobPojo(ObjectId _id, String jobids, int jobidn, String xmlLocation, String jarLocation,
        CustomMapReduceJobPojo job) {
    try {
        BasicDBObject set = new BasicDBObject();
        set.append(CustomMapReduceJobPojo.jobidS_, jobids);
        set.append(CustomMapReduceJobPojo.jobidN_, jobidn);
        set.append(CustomMapReduceJobPojo.tempConfigXMLLocation_, xmlLocation);
        set.append(CustomMapReduceJobPojo.tempJarLocation_, jarLocation);
        set.append(CustomMapReduceJobPojo.errorMessage_, null);
        BasicDBObject updateObject = new BasicDBObject(MongoDbManager.set_, set);
        DbManager.getCustom().getLookup().update(new BasicDBObject(CustomMapReduceJobPojo._id_, _id),

        if ((null != job) && (null != job.derivedFromSourceKey)) {
            //update to success_iteration
            BasicDBObject query = new BasicDBObject(SourcePojo.key_, job.derivedFromSourceKey);
            BasicDBObject setUpdate = new BasicDBObject(SourceHarvestStatusPojo.sourceQuery_harvest_status_,
            BasicDBObject srcUpdate = new BasicDBObject(DbManager.set_, setUpdate);
            DbManager.getIngest().getSource().update(query, srcUpdate, false, false);
    } catch (Exception ex) {