Example usage for com.mongodb DBCursor toArray

List of usage examples for com.mongodb DBCursor toArray

Introduction

In this page you can find the example usage for com.mongodb DBCursor toArray.

Prototype

public List<DBObject> toArray() 

Source Link

Document

Converts this cursor to an array.

Usage

From source file:com.ikanow.infinit.e.api.config.source.SourceHandler.java

License:Open Source License

/**
 * getPendingSources/*from   w ww.j a v  a2 s . c  o  m*/
 * Get a list of sources pending approval for a list of one or more
 * community IDs passed via the communityid parameter
 * @param communityIdStrList
 * @return
 */
public ResponsePojo getPendingSources(String userIdStr, String communityIdStrList, boolean bStrip) {
    ResponsePojo rp = new ResponsePojo();
    try {
        String[] communityIdStrs = SocialUtils.getCommunityIds(userIdStr, communityIdStrList);
        ObjectId userId = null;
        boolean bAdmin = RESTTools.adminLookup(userIdStr);
        if (!bAdmin) {
            userId = new ObjectId(userIdStr); // (ie not admin, may not see 
        }
        Set<ObjectId> communityIdSet = new TreeSet<ObjectId>();
        Set<ObjectId> ownedOrModeratedCommunityIdSet = new TreeSet<ObjectId>();
        for (String s : communityIdStrs) {
            ObjectId communityId = new ObjectId(s);
            communityIdSet.add(communityId);
            if (null != userId) {
                if (isOwnerOrModerator(communityId.toString(), userIdStr)) {
                    ownedOrModeratedCommunityIdSet.add(communityId);
                }
            }
        }
        //TESTED (owner and community owner, public and not public) 

        // Set up the query
        BasicDBObject query = new BasicDBObject();
        query.put(SourcePojo.isApproved_, false);
        query.put(SourcePojo.communityIds_, new BasicDBObject(MongoDbManager.in_, communityIdSet));
        BasicDBObject fields = new BasicDBObject();
        if (bStrip) {
            setStrippedFields(fields);
        }
        DBCursor dbc = DbManager.getIngest().getSource().find(query, fields);

        // Remove communityids we don't want the user to see:
        if (bStrip && sanityCheckStrippedSources(dbc.toArray(), bAdmin)) {
            rp.setData(dbc.toArray(), (BasePojoApiMap<DBObject>) null);
        } else {
            rp.setData(SourcePojo.listFromDb(dbc, SourcePojo.listType()),
                    new SourcePojoApiMap(userId, communityIdSet, ownedOrModeratedCommunityIdSet));
        }
        rp.setResponse(new ResponseObject("Pending Sources", true, "successfully returned pending sources"));
    } catch (Exception e) {
        // If an exception occurs log the error
        logger.error("Exception Message: " + e.getMessage(), e);
        rp.setResponse(new ResponseObject("Pending Sources", false, "error returning pending sources"));
    }
    return rp;
}

From source file:com.ikanow.infinit.e.api.config.source.SourceHandler.java

License:Open Source License

/**
 * getUserSources/*from  w ww .  ja va2  s .com*/
 * @param userIdStr
 * @param userId
 * @return
 */
public ResponsePojo getUserSources(String userIdStr, boolean bStrip) {
    ResponsePojo rp = new ResponsePojo();
    try {
        boolean bAdmin = RESTTools.adminLookup(userIdStr);
        HashSet<ObjectId> userCommunities = SocialUtils.getUserCommunities(userIdStr);

        DBCursor dbc = null;
        BasicDBObject query = new BasicDBObject();
        query.put(SourcePojo.communityIds_, new BasicDBObject(MongoDbManager.in_, userCommunities));
        BasicDBObject fields = new BasicDBObject();
        if (bStrip) {
            setStrippedFields(fields);
        }

        Set<ObjectId> ownedOrModeratedCommunityIdSet = null;
        if (!bAdmin) {
            ownedOrModeratedCommunityIdSet = new TreeSet<ObjectId>();
            for (ObjectId communityId : userCommunities) {
                if (isOwnerOrModerator(communityId.toString(), userIdStr)) {
                    ownedOrModeratedCommunityIdSet.add(communityId);
                }
            }
        }
        // Get all sources for admins
        if (bAdmin) {
            dbc = DbManager.getIngest().getSource().find(query, fields);
        }
        // Get only sources the user owns or owns/moderates the parent community
        else {
            query.put(SourcePojo.ownerId_, new ObjectId(userIdStr));
            BasicDBObject query2 = new BasicDBObject();
            query2.put(SourcePojo.communityIds_,
                    new BasicDBObject(MongoDbManager.in_, ownedOrModeratedCommunityIdSet));
            dbc = DbManager.getIngest().getSource()
                    .find(new BasicDBObject(MongoDbManager.or_, Arrays.asList(query, query2)), fields);
        }
        if (bStrip && sanityCheckStrippedSources(dbc.toArray(), bAdmin)) {
            rp.setData(dbc.toArray(), (BasePojoApiMap<DBObject>) null);
        } else {
            rp.setData(SourcePojo.listFromDb(dbc, SourcePojo.listType()),
                    new SourcePojoApiMap(null, userCommunities, null));
        }
        rp.setResponse(new ResponseObject("User's Sources", true, "successfully returned user's sources"));
    } catch (Exception e) {
        // If an exception occurs log the error
        logger.error("Exception Message: " + e.getMessage(), e);
        rp.setResponse(new ResponseObject("User's Sources", false, "error returning user's sources"));
    }
    return rp;
}

From source file:com.ikanow.infinit.e.harvest.enrichment.legacy.alchemyapi.AlchemyEntityGeoCleanser.java

License:Open Source License

public boolean cleanseGeoInDocu(DocumentPojo doc) {

    boolean bChangedAnything = false;

    Map<String, Candidate> dubiousLocations = new HashMap<String, Candidate>();

    Set<String> otherRegions = new HashSet<String>();
    Set<String> otherCountries = new HashSet<String>();
    Set<String> otherCountriesOrRegionsReferenced = new HashSet<String>();

    //Debug//from  w w  w  . j  a v  a2 s .co m
    if (_nDebugLevel >= 2) {
        System.out.println(
                "+++++++ Doc: " + doc.getTitle() + " / " + doc.getId() + " / " + doc.getEntities().size());
    }

    // 1] First off, let's find anything location-based and also determine if it's bad or not 

    if (null != doc.getEntities())
        for (EntityPojo ent : doc.getEntities()) {

            boolean bStrongCandidate = false;

            // People: decompose names
            if (EntityPojo.Dimension.Where == ent.getDimension()) {

                // So locations get disambiguated to one of:
                // "<city-etc>, <region-or-country>", or "<region-or-country>"
                // though can also just be left as they are.

                String sActualName = ent.getActual_name().toLowerCase();
                if (!ent.getDisambiguatedName().toLowerCase().equals(sActualName)) {
                    // It's been disambiguated

                    //Debug
                    if (_nDebugLevel >= 3) {
                        System.out.println("disambiguous candidate: " + ent.getDisambiguatedName() + " VS "
                                + ent.getActual_name() + " ("
                                + ((null != ent.getSemanticLinks()) ? ent.getSemanticLinks().size() : 0) + ")");
                    }

                    // OK next step, is it a disambiguation to a US town?
                    String splitMe[] = ent.getDisambiguatedName().split(", ");
                    if (2 == splitMe.length) {

                        String stateOrCountry = splitMe[1];
                        Matcher m = _statesRegex.matcher(stateOrCountry);
                        if (m.find()) { // This is a US disambiguation - high risk case
                            // Short cut if state is already directly mentioned?
                            stateOrCountry = stateOrCountry.toLowerCase();

                            if (!otherRegions.contains(stateOrCountry)) { // See list below - no need to go any further

                                // OK next step - is it a possible ambiguity:
                                ArrayList<BasicDBObject> x = new ArrayList<BasicDBObject>();
                                BasicDBObject inner0_0 = new BasicDBObject(MongoDbManager.not_,
                                        Pattern.compile("US"));
                                BasicDBObject inner1_0 = new BasicDBObject("country_code", inner0_0);
                                x.add(inner1_0);

                                BasicDBObject inner0_1 = new BasicDBObject(MongoDbManager.gte_, 400000);
                                BasicDBObject inner1_1 = new BasicDBObject("population", inner0_1);
                                x.add(inner1_1);

                                BasicDBObject dbo = new BasicDBObject();
                                dbo.append("search_field", sActualName);
                                dbo.append(MongoDbManager.or_, x);

                                DBCursor dbc = _georefDB.find(dbo);
                                if (dbc.size() >= 1) { // Problems!

                                    //Create list of candidates

                                    Type listType = new TypeToken<LinkedList<GeoFeaturePojo>>() {
                                    }.getType();
                                    LinkedList<GeoFeaturePojo> grpl = new Gson()
                                            .fromJson(dbc.toArray().toString(), listType);

                                    //Debug
                                    if (_nDebugLevel >= 2) {
                                        System.out.println("\tERROR CANDIDATE: " + ent.getDisambiguatedName()
                                                + " VS " + ent.getActual_name() + " (" + dbc.count() + ")");

                                        if (_nDebugLevel >= 3) {
                                            for (GeoFeaturePojo grp : grpl) {
                                                System.out.println("\t\tCandidate:" + grp.getCity() + " / "
                                                        + grp.getRegion() + " / " + grp.getCountry());
                                            }
                                        }
                                    }

                                    Candidate candidate = new Candidate(ent, grpl, stateOrCountry);
                                    dubiousLocations.put(ent.getIndex(), candidate);
                                    bStrongCandidate = true;

                                } // if strong candidate
                            } //TESTED ("reston, virginia" after "virginia/stateorcounty" mention)
                              // (end if can't shortcut past all this)

                        } // end if a US town
                    } // end if in the format "A, B"

                } // if weak candidate
                  //TESTED

                if (!bStrongCandidate) { // Obv can't count on a disambiguous candidate:               
                    String type = ent.getType().toLowerCase();

                    if (type.equals("stateorcounty")) {
                        String disName = ent.getDisambiguatedName().toLowerCase();
                        if (_abbrStateRegex.matcher(disName).matches()) {
                            otherRegions.add(getStateFromAbbr(disName));
                        } else {
                            otherRegions.add(ent.getDisambiguatedName().toLowerCase());
                        }
                        otherCountriesOrRegionsReferenced.add("united states");
                    } //TESTED: "mich./stateorcounty"
                    else if (type.equals("country")) {
                        String disName = ent.getDisambiguatedName().toLowerCase();

                        // Translation of known badly transcribed countries:
                        // (England->UK)
                        if (disName.equals("england")) {
                            otherCountries.add("united kingdom");
                        } //TESTED
                        else {
                            otherCountries.add(ent.getDisambiguatedName().toLowerCase());
                        }
                    } else if (type.equals("region")) {
                        otherRegions.add(ent.getDisambiguatedName().toLowerCase());
                    } else if (type.equals("city")) {
                        String splitMe[] = ent.getDisambiguatedName().split(",\\s*");
                        if (2 == splitMe.length) {
                            otherCountriesOrRegionsReferenced.add(splitMe[1].toLowerCase());
                            if (this._statesRegex.matcher(splitMe[1]).find()) {
                                otherCountriesOrRegionsReferenced.add("united states");
                            } //TESTED: "lexingon, kentucky/city"
                        }
                    }
                } //TESTED: just above clauses

            } // if location

        } // (end loop over entities)

    // Debug:
    if ((_nDebugLevel >= 3) && (!dubiousLocations.isEmpty())) {
        for (String s : otherRegions) {
            System.out.println("Strong region: " + s);
        }
        for (String s : otherCountries) {
            System.out.println("Strong countries: " + s);
        }
        for (String s : otherCountriesOrRegionsReferenced) {
            System.out.println("Weak regionscountries: " + s);
        }
    }

    // 2] The requirements and algorithm are discussed in 
    // http://ikanow.jira.com/wiki/display/INF/Beta...+improving+AlchemyAPI+extraction+%28geo%29
    // Canonical cases:
    // Darfur -> Darfur, MN even though Sudan and sometimes Darfur, Sudan are present
    // Shanghai -> Shanghai, WV even though China is mentioned (and not WV)
    // Manchester -> Manchester village, NY (not Manchester, UK)
    // Philadelphia -> Philadelphia (village), NY (though NY is mentioned and not PA) 

    // We're generating the following order
    //       10] Sitting tenant with strong direct
    //       15] Large city with strong direct      
    //       20] Region with direct
    //       30] Large city with strong indirect
    //       40] Sitting tenant with strong indirect 
    //       50] Region with indirect
    //       60] Another foreign possibility with strong direct 
    //       70] Large city with weak direct
    //       72] Large city with weak indirect
    //       75] Large city with no reference 
    //       78] Another foreign possibility with strong indirect (>100K population - ie not insignificant) 
    //       80] Sitting tenant with any weak (US) direct or indirect 
    //       90] Another foreign possibility with strong indirect 
    //      100] Another foreign possibility with weak direct 
    //      110] Another foreign possibility with weak indirect 
    //      120] Region with no reference, if there is only 1
    //      130] Sitting tenant with none of the above (ie default)
    //      140] Anything else!

    for (Map.Entry<String, Candidate> pair : dubiousLocations.entrySet()) {
        EntityPojo ent = pair.getValue().entity;
        Candidate candidate = pair.getValue();

        // 2.1] Let's analyse the "sitting tenant"

        int nPrio = 130;
        GeoFeaturePojo currLeader = null;
        int nCase = 0; // (just for debugging, 0=st, 1=large city, 2=region, 3=other)

        if (otherRegions.contains(candidate.state)) { // Strong direct ref, winner!
            nPrio = 10; // winner!
        } //TESTED: "san antonio, texas/city" vs "texas"
        else if (otherCountriesOrRegionsReferenced.contains(candidate.state)) {
            // Indirect ref
            nPrio = 40; // good, but beatable...
        } //TESTED: "philadelphia (village), new york/city" 
        else if (otherCountries.contains("united states")) { // Weak direct ref
            nPrio = 80; // better than nothing...            
        } //TESTED: "apache, oklahoma/city"
        else if (otherCountriesOrRegionsReferenced.contains("united states")) { // Weak indirect ref
            nPrio = 80; // better than nothing...            
        } //TESTED: "washington, d.c." have DC as stateorcounty, but US in countries list

        // Special case: we don't like "village":
        if ((80 != nPrio) && ent.getDisambiguatedName().contains("village")
                && !ent.getActual_name().contains("village")) {
            nPrio = 80;
        } //TESTED: "Downvoted: Philadelphia (village), New York from Philadelphia"

        // Debug
        if (_nDebugLevel >= 2) {
            System.out.println(pair.getKey() + " SittingTenantScore=" + nPrio);
        }

        // Alternatives
        if (nPrio > 10) {

            LinkedList<GeoFeaturePojo> geos = pair.getValue().candidates;
            for (GeoFeaturePojo geo : geos) {

                int nAltPrio = 140;
                int nAltCase = -1;
                String city = (null != geo.getCity()) ? geo.getCity().toLowerCase() : null;
                String region = (null != geo.getRegion()) ? geo.getRegion().toLowerCase() : null;
                String country = (null != geo.getCountry()) ? geo.getCountry().toLowerCase() : null;

                // 2.2] CASE 1: I'm a city with pop > 1M (best score 15)
                //                15] Large city with strong direct      
                //                30] Large city with strong indirect
                //                70] Large city with weak direct
                //                72] Large city with weak indirect
                //                75] Large city with no reference                

                if ((null != city) && (geo.getPopulation() >= 400000) && (nPrio > 15)) {
                    nAltCase = 1;

                    if ((null != region) && (otherRegions.contains(region))) {
                        nAltPrio = 15; // strong direct
                    } //TESTED: "dallas / Texas / United States = 15"
                    else if ((null != region) && (otherCountriesOrRegionsReferenced.contains(region))) {
                        nAltPrio = 30; // strong indirect
                    } //TESTED: "sacramento / California / United State"
                    else if ((null != country) && (otherCountries.contains(country))) {
                        nAltPrio = 70; // weak direct 
                    } //TESTED: "berlin, germany", with "germany" directly mentioned
                    else if ((null != country) && (otherCountriesOrRegionsReferenced.contains(country))) {
                        nAltPrio = 72; // weak indirect 
                    } //TESTED: "los angeles / California / United States = 72"
                    else {
                        nAltPrio = 75; // just for being big!
                    } //TESTED: "barcelona, spain"
                }

                // 2.3] CASE 2: I'm a region (best score=20, can beat current score)
                //                20] Region with direct
                //                50] Region with indirect
                //               120] Region with no reference, if there is only 1

                else if ((null == city) && (nPrio > 20)) {
                    nAltCase = 2;

                    if ((null != country) && (otherCountries.contains(country))) {
                        nAltPrio = 20; // strong direct 
                    } //TESTED: (region) "Berlin, Germany" with "Germany" mentioned
                    else if ((null != country) && (otherCountriesOrRegionsReferenced.contains(country))) {
                        nAltPrio = 50; // strong indirect 
                    } //(haven't seen, but we'll live)
                    else {
                        nAltPrio = 120; // (just for being there)
                    } //TESTED: "null / Portland / Jamaica = 120", also "Shanghai / China"
                }

                // 2.4] CASE 3: I'm any foreign possibility (best score=60)
                //                60] Another foreign possibility with strong direct 
                //                78] Another foreign possibility with strong indirect (>100K population - ie not insignificant) 
                //                90] Another foreign possibility with strong indirect 
                //               100] Another foreign possibility with weak direct 
                //               110] Another foreign possibility with weak indirect 

                else if (nPrio > 60) {
                    nAltCase = 3;

                    if ((null != region) && (otherRegions.contains(region))) {
                        nAltPrio = 60; // strong direct

                        // Double check we're not falling into the trap below:
                        if (!geo.getCountry_code().equals("US")) {
                            Matcher m = this._statesRegex.matcher(geo.getRegion());
                            if (m.matches()) { // non US state matching against (probably) US state, disregard)
                                nAltPrio = 140;
                            }
                        } //TESTED (same clause as below)

                    } //TESTED: lol "philadelphia / Maryland / Liberia = 60" (before above extra clause)

                    if (nAltPrio > 60) { // (may need to re-run test)
                        if ((null != country) && (otherCountries.contains(country))) {
                            if (geo.getPopulation() < 100000) {
                                nAltPrio = 90; // strong indirect
                            } //TESTED: "washington / Villa Clara / Cuba"
                            else {
                                nAltPrio = 78; // strong indirect, with boost!                        
                            } //TESTED: "geneva, Geneve, Switzerland", pop 180K
                        } else if ((null != region) && (otherCountriesOrRegionsReferenced.contains(region))) {
                            nAltPrio = 100; // weak direct
                        } //TESTED: "lincoln / Lincolnshire / United Kingdom = 100"
                        else if ((null != country) && (otherCountriesOrRegionsReferenced.contains(country))) {
                            nAltPrio = 110; // weak indirect
                        } //(haven't seen, but we'll live)                  
                    }
                }
                // Debug:
                if ((_nDebugLevel >= 2) && (nAltPrio < 140)) {
                    System.out.println("----Alternative: " + geo.getCity() + " / " + geo.getRegion() + " / "
                            + geo.getCountry() + " score=" + nAltPrio);
                }

                // Outcome of results:

                if (nAltPrio < nPrio) {
                    currLeader = geo;
                    nPrio = nAltPrio;
                    nCase = nAltCase;
                }
            } // end loop over alternativse

            if (null != currLeader) { // Need to change

                if (1 == nCase) {
                    this._nMovedToLargeCity++;

                    //(Cities are lower case in georef DB for some reason)
                    String city = WordUtils.capitalize(currLeader.getCity());

                    if (currLeader.getCountry_code().equals("US")) { // Special case: is this just the original?

                        String region = currLeader.getRegion();
                        if (region.equals("District of Columbia")) { // Special special case
                            region = "D.C.";
                        }
                        String sCandidate = city + ", " + region;

                        if (!sCandidate.equals(ent.getDisambiguatedName())) {
                            ent.setDisambiguatedName(sCandidate);
                            ent.setIndex(ent.getDisambiguatedName() + "/city");
                            ent.setSemanticLinks(null);
                            bChangedAnything = true;
                        } //TESTED (lots, eg "Philadelphia (village), New York" -> "Philadelphia, PA"; Wash, Ill. -> Wash DC)
                        else {
                            this._nMovedToLargeCity--;
                            _nStayedWithOriginal++;
                        } //TESTED ("Washington DC", "San Juan, Puerto Rico")
                    } //TESTED (see above)
                    else {
                        ent.setDisambiguatedName(city + ", " + currLeader.getCountry());
                        ent.setIndex(ent.getDisambiguatedName() + "/city");
                        ent.setSemanticLinks(null);
                        bChangedAnything = true;
                    } //TESTED: "london, california/city to London, United Kingdom"
                } else if (2 == nCase) {
                    this._nMovedToRegion++;
                    ent.setDisambiguatedName(currLeader.getRegion() + ", " + currLeader.getCountry());
                    ent.setIndex(ent.getDisambiguatedName() + "/region");
                    ent.setSemanticLinks(null);
                    bChangedAnything = true;

                } //TESTED: "Moved madrid, new york/city to Madrid, Spain" (treats Madrid as region, like Berlin see above)
                else {
                    //(Cities are lower case in georef DB for some reason)
                    String city = WordUtils.capitalize(currLeader.getCity());

                    this._nMovedToForeignCity++;
                    ent.setDisambiguatedName(city + ", " + currLeader.getCountry());
                    ent.setIndex(ent.getDisambiguatedName() + "/city");
                    ent.setSemanticLinks(null);
                    bChangedAnything = true;

                } //TESTED: "Moved geneva, new york/city to Geneva, Switzerland"

                if ((_nDebugLevel >= 1) && (null == ent.getSemanticLinks())) {
                    System.out.println("++++ Moved " + pair.getKey() + " to " + ent.getDisambiguatedName());
                }
            } else {
                _nStayedWithOriginal++;
            }

        } // (if sitting tenant not holder)

    } // (end loop over candidates)      

    if ((_nDebugLevel >= 1) && bChangedAnything) {
        System.out.println("\t(((Doc: " + doc.getTitle() + " / " + doc.getId() + " / " + doc.getUrl() + ")))");
    }

    return bChangedAnything;
}

From source file:com.ikanow.infinit.e.harvest.extraction.document.file.InternalInfiniteFile.java

License:Open Source License

public InternalInfiniteFile(String url, NtlmPasswordAuthentication auth) throws MalformedURLException {
    try {//from w  ww  .  j  a v  a  2  s. c  o  m
        ObjectId locationId = null;

        ObjectId ownerId = null;
        String communityIdsStr = null;

        if (url.startsWith(INFINITE_SHARE_PREFIX)) {
            _isShare = true;
            locationId = new ObjectId(url.substring(INFINITE_SHARE_PREFIX_LEN).replaceFirst("/.*$", "")); // remove trailing /s, can be used for information
            //TESTED (2.1, 2.2.1, 2.3)

            BasicDBObject query = new BasicDBObject(SharePojo._id_, locationId);
            _resultObj = (BasicDBObject) MongoDbManager.getSocial().getShare().findOne(query);
            if (null == _resultObj) {
                throw new MalformedURLException("Not found (or not authorized): " + url);
            } //TESTED (7.1)
            String mediaType = (String) _resultObj.get(SharePojo.mediaType_);
            if ((null != mediaType) && (mediaType.equalsIgnoreCase("application/x-zip-compressed")
                    || mediaType.equalsIgnoreCase("application/zip"))) {
                _isDirectory = true;
                ObjectId fileId = _resultObj.getObjectId(SharePojo.binaryId_);

                GridFSRandomAccessFile file = new GridFSRandomAccessFile(
                        MongoDbManager.getSocial().getShareBinary(), fileId);
                _zipView = new GridFSZipFile(_resultObj.getString(SharePojo.title_), file);
            } //TESTED (3.1)
            else { // Single share
                if (_resultObj.containsField(SharePojo.documentLocation_)) {
                    throw new MalformedURLException("Reference shares are not currently supported");
                } //TESTED (0.1)

                _isDirectory = false; // (this will get traversed as the initial "directory", which doesn't check isDirectory...
                //... and will return itself as a single file in the "directory")
            } //TESTED (1.1, 2.1, 3.1)
        } //TESTED 
        else if (url.startsWith(INFINITE_CUSTOM_PREFIX)) {
            _isCustom = true;
            _isDirectory = true;
            BasicDBObject query = null;
            String locationStr = url.substring(INFINITE_CUSTOM_PREFIX_LEN).replaceFirst("/.*$", "");
            StringBuffer sb = new StringBuffer(INFINITE_CUSTOM_PREFIX);
            try {
                locationId = new ObjectId(locationStr);
                query = new BasicDBObject(CustomMapReduceJobPojo._id_, locationId);
            } //TESTED (4.1)
            catch (Exception e) { // for custom jobs can also specify the job name
                query = new BasicDBObject(CustomMapReduceJobPojo.jobtitle_, locationStr);
            } //TESTED (5.1, 6.1)
            _resultObj = (BasicDBObject) MongoDbManager.getCustom().getLookup().findOne(query);
            if (null == _resultObj) {
                throw new MalformedURLException("Not found (or not authorized): " + url);
            } //TESTED (7.2, 7.3)
            if (null != locationId) {
                sb.append(locationStr).append('/')
                        .append(_resultObj.getString(CustomMapReduceJobPojo.jobtitle_)).append('/');
            } //TESTED (5.1, 6.1)
            else {
                sb.append(_resultObj.getObjectId(CustomMapReduceJobPojo._id_).toString()).append('/')
                        .append(locationStr).append('/');
            } //TESTED (4.1)         
            _originalUrl = sb.toString();
            _isCustomAppend = _resultObj.getBoolean(CustomMapReduceJobPojo.appendResults_, false);

            String outputDatabase = _resultObj.getString(CustomMapReduceJobPojo.outputDatabase_);
            String outputCollection = _resultObj.getString(CustomMapReduceJobPojo.outputCollection_);
            if (null == outputDatabase) {
                outputDatabase = "custommr";
            }
            DBCollection outColl = MongoDbManager.getCollection(outputDatabase, outputCollection);
            BasicDBObject sort = null;
            if (_isCustomAppend) { // Use time of _last_ record as file time
                sort = new BasicDBObject("_id", -1);
            } else { // Use time of _first_ record as file time
                sort = new BasicDBObject("_id", 1);
            } //TESTED
            DBCursor dbc = outColl.find().sort(sort).limit(1);
            List<DBObject> firstOrLastRecordList = dbc.toArray();
            if (!firstOrLastRecordList.isEmpty()) {
                _overwriteTime = ((ObjectId) firstOrLastRecordList.iterator().next().get("_id")).getTime();
            } else { // No records, use lastRunTime_ as backup
                _overwriteTime = _resultObj.getDate(CustomMapReduceJobPojo.lastRunTime_, new Date()).getTime();
            } //TOTEST

        } //TESTED
        else {
            throw new MalformedURLException("Not recognized: " + url);
        } //TESTED (7.4)
        communityIdsStr = auth.getDomain();
        ownerId = new ObjectId(auth.getUsername());

        // Now do some authentication:
        // Check communities first since that involves no external DB queries:
        boolean isAuthorized = false;
        if (_isShare) {
            BasicDBList communities = (BasicDBList) _resultObj.get(SharePojo.communities_);
            for (Object communityObj : communities) {
                BasicDBObject communityDbo = (BasicDBObject) communityObj;
                ObjectId commId = communityDbo.getObjectId("_id");
                if (communityIdsStr.contains(commId.toString())) {
                    isAuthorized = true;
                    break;
                }
            }
        } //TESTED (7.*)
        else { //_isCustom
            BasicDBList communities = (BasicDBList) _resultObj.get(CustomMapReduceJobPojo.communityIds_);
            for (Object communityObj : communities) {
                ObjectId commId = (ObjectId) communityObj;
                if (communityIdsStr.equals(commId)) {
                    isAuthorized = true;
                    break;
                }
            }
        } //TESTED (7.*)
        if (!isAuthorized) { // Still OK ... only if user is an admin
            isAuthorized = AuthUtils.isAdmin(ownerId);
        } //TESTED (1,2,3,4,5,6)
        if (!isAuthorized) { // Permission fail
            throw new MalformedURLException("Not found (or not authorized): " + url);
        } //TESTED (7.5)
    } catch (Exception e) {
        throw new MalformedURLException("Invalid authentication or location: " + e.getMessage());
    } //(just passed exceptions on)
      // Save original URL
    if (_isShare) { // (custom handled above)
        if (!url.endsWith("/")) {
            _originalUrl = url + "/";
        } else {
            _originalUrl = url;
        }

    } //(TESTED 1.3, 2.3, 3.3)

}

From source file:com.ikanow.infinit.e.processing.generic.GenericProcessingController.java

License:Open Source License

public void InitializeIndex(boolean bDeleteDocs, boolean bDeleteEntityFeature, boolean bDeleteEventFeature,
        boolean bRebuildDocsIndex) {

    try { //create elasticsearch indexes

        if (!ElasticSearchManager.pingIndex(null, null)) {
            throw new RuntimeException("Index is red, disable indexing operations");
        } //TESTED

        PropertiesManager pm = new PropertiesManager();

        if (!pm.getAggregationDisabled()) {

            boolean languageNormalization = pm.getNormalizeEncoding();

            Builder localSettingsEvent = ImmutableSettings.settingsBuilder();
            localSettingsEvent.put("number_of_shards", 10).put("number_of_replicas", 2);
            localSettingsEvent.put("index.analysis.analyzer.suggestAnalyzer.tokenizer", "standard");
            if (languageNormalization) {
                localSettingsEvent.putArray("index.analysis.analyzer.suggestAnalyzer.filter", "icu_normalizer",
                        "icu_folding", "standard", "lowercase");
            } else {
                localSettingsEvent.putArray("index.analysis.analyzer.suggestAnalyzer.filter", "standard",
                        "lowercase");
            }/*from   ww  w  .j  a va  2s .c o  m*/

            Builder localSettingsGaz = ImmutableSettings.settingsBuilder();
            localSettingsGaz.put("number_of_shards", 10).put("number_of_replicas", 2);
            localSettingsGaz.put("index.analysis.analyzer.suggestAnalyzer.tokenizer", "standard");
            if (languageNormalization) {
                localSettingsGaz.putArray("index.analysis.analyzer.suggestAnalyzer.filter", "icu_normalizer",
                        "icu_folding", "standard", "lowercase");
            } else {
                localSettingsGaz.putArray("index.analysis.analyzer.suggestAnalyzer.filter", "standard",
                        "lowercase");
            }

            //event feature
            String eventGazMapping = new Gson().toJson(new AssociationFeaturePojoIndexMap.Mapping(),
                    AssociationFeaturePojoIndexMap.Mapping.class);
            ElasticSearchManager eventIndex = IndexManager.createIndex(
                    AssociationFeaturePojoIndexMap.indexName_, null, false, null, eventGazMapping,
                    localSettingsEvent);
            if (null == eventIndex) { // (if has been previously referenced in this process space)
                eventIndex = IndexManager.getIndex(AssociationFeaturePojoIndexMap.indexName_);
            }
            eventIndex.createAlias(AssociationFeaturePojoIndexMap.indexCollectionName_);
            if (bDeleteEventFeature) {
                eventIndex.deleteMe();
                eventIndex = IndexManager.createIndex(AssociationFeaturePojoIndexMap.indexName_, null, false,
                        null, eventGazMapping, localSettingsEvent);
            }
            //entity feature
            String gazMapping = new Gson().toJson(new EntityFeaturePojoIndexMap.Mapping(),
                    EntityFeaturePojoIndexMap.Mapping.class);
            ElasticSearchManager entityIndex = IndexManager.createIndex(EntityFeaturePojoIndexMap.indexName_,
                    null, false, null, gazMapping, localSettingsGaz);
            if (null == entityIndex) { // (if has been previously referenced in this process space)
                entityIndex = IndexManager.getIndex(EntityFeaturePojoIndexMap.indexName_);
            }
            entityIndex.createAlias(EntityFeaturePojoIndexMap.indexCollectionName_);
            if (bDeleteEntityFeature) {
                entityIndex.deleteMe();
                entityIndex = IndexManager.createIndex(EntityFeaturePojoIndexMap.indexName_, null, false, null,
                        gazMapping, localSettingsGaz);
            }
        }

        //DOCS - much more complicated than anything else 

        boolean bPingMainIndexFailed = !ElasticSearchManager
                .pingIndex(DocumentPojoIndexMap.globalDocumentIndex_);
        // (ie if main doc index doesn't exist then always rebuild all indexes)

        if (bPingMainIndexFailed) { // extra level of robustness... sleep for a minute then double check the index is really missing...
            try {
                Thread.sleep(60000);
            } catch (Exception e) {
            }
            bPingMainIndexFailed = !ElasticSearchManager.pingIndex(DocumentPojoIndexMap.globalDocumentIndex_);
        }
        bRebuildDocsIndex |= bPingMainIndexFailed;

        // check the main index has the "collection" alias - if not then rebuild everything

        if (!bPingMainIndexFailed && (null == _aliasInfo)) {
            ElasticSearchManager docIndex = ElasticSearchManager
                    .getIndex(DocumentPojoIndexMap.globalDocumentIndex_);
            ClusterStateResponse clusterState = docIndex.getRawClient().admin().cluster()
                    .state(new ClusterStateRequest()).actionGet();
            _aliasInfo = CrossVersionImmutableMapOfImmutableMaps
                    .getAliases(clusterState.getState().getMetaData());
            if (!_aliasInfo.containsKey(DocumentPojoIndexMap.globalDocumentIndexCollection_)) {
                bRebuildDocsIndex = true;
            }
        } //TESTED

        createCommunityDocIndex(DocumentPojoIndexMap.globalDocumentIndex_, null, false, true, bDeleteDocs);
        createCommunityDocIndex(DocumentPojoIndexMap.manyGeoDocumentIndex_, null, false, false, bDeleteDocs);

        // Some hardwired dummy communities
        createCommunityDocIndex("4e3706c48d26852237078005", null, true, false, bDeleteDocs); // (admin)
        createCommunityDocIndex("4e3706c48d26852237079004", null, true, false, bDeleteDocs); // (test user)
        // (create dummy index used to keep personal group aliases)

        if (bRebuildDocsIndex || bDeleteDocs) {

            // OK, going to have different shards for different communities:
            // Get a list of all the communities:

            BasicDBObject query = new BasicDBObject();
            BasicDBObject fieldsToDrop = new BasicDBObject("members", 0);
            fieldsToDrop.put("communityAttributes", 0);
            fieldsToDrop.put("userAttributes", 0);
            DBCursor dbc = DbManager.getSocial().getCommunity().find(query, fieldsToDrop);

            List<DBObject> tmparray = dbc.toArray(); // (brings the entire thing into memory so don't get cursor timeouts)
            int i = 0;
            System.out.println("Initializing " + dbc.size() + " indexes:");
            for (int j = 0; j < 2; ++j) {
                for (DBObject dbotmp : tmparray) {
                    if ((++i % 100) == 0) {
                        System.out.println("Initialized " + i + " indexes.");
                    }
                    BasicDBObject dbo = (BasicDBObject) dbotmp;

                    // OK, going to see if there are any sources with this group id, create a new index if so:
                    // (Don't use CommunityPojo data model here for performance reasons....
                    //  (Also, haven't gotten round to porting CommunityPojo field access to using static fields))
                    ObjectId communityId = (ObjectId) dbo.get("_id");
                    boolean bPersonalGroup = dbo.getBoolean("isPersonalCommunity", false);
                    boolean bSystemGroup = dbo.getBoolean("isSystemCommunity", false);
                    ObjectId parentCommunityId = (ObjectId) dbo.get("parentId");

                    createCommunityDocIndex(communityId.toString(), parentCommunityId, bPersonalGroup,
                            bSystemGroup, bDeleteDocs, j == 0);

                } //end loop over communities
            } // end loop over communities - first time parents only
        } // (end if need to do big loop over all sources)
    } catch (Exception e) {
        //DEBUG
        //e.printStackTrace();

        throw new RuntimeException(e.getMessage());
    }
}

From source file:com.ikanow.infinit.e.utility.MongoEntityFeatureTxfer.java

License:Apache License

@SuppressWarnings("unused")
private void doUnitTestCode(String sMongoDbHost, String sMongoDbPort, String sElasticHost, String sElasticPort,
        BasicDBObject query, int nLimit) {
    Mongo mongoDB = null;/*from  ww  w  . j a va 2 s  . c o  m*/
    ElasticSearchManager elasticManager = null;

    try {
        // Initialize the DB:

        mongoDB = new Mongo(sMongoDbHost, Integer.parseInt(sMongoDbPort));
        DBCollection gazDB = mongoDB.getDB("feature").getCollection("entity");

        // Initialize the ES (create the index if it doesn't already):

        // 1. Set-up the entity feature index 

        String indexName = "entity_index";

        //TEST: delete the index:
        //         elasticManager = ElasticSearchManager.getIndex(indexName, sElasticHost + ":" + sElasticPort);
        //         elasticManager.deleteMe();

        //TEST: create the index
        //         String sMapping = new Gson().toJson(new GazateerPojo.Mapping(), GazateerPojo.Mapping.class);
        //         Builder localSettings = ImmutableSettings.settingsBuilder();
        //         localSettings.put("number_of_shards", 1).put("number_of_replicas", 0);          q
        //         elasticManager = ElasticSearchManager.createIndex
        //                        (indexName, false, 
        //                              sElasticHost + ":" + sElasticPort, 
        //                              sMapping, localSettings);

        //TEST: delete the index:
        //         elasticManager.deleteMe();

        //TEST: get the index:
        //         elasticManager = ElasticSearchManager.getIndex(indexName, sElasticHost + ":" + sElasticPort);

        // Now query the DB:

        DBCursor dbc = null;
        if (nLimit > 0) {
            dbc = gazDB.find(query).limit(nLimit);
        } else { // Everything!
            dbc = gazDB.find(query);
        }

        Type listType = new TypeToken<ArrayList<EntityFeaturePojo>>() {
        }.getType();
        List<EntityFeaturePojo> entities = new Gson().fromJson(dbc.toArray().toString(), listType);

        //Debug:
        List<String> entIds = new LinkedList<String>();

        // Loop over array and invoke the cleansing function for each one

        for (EntityFeaturePojo ent : entities) {

            if (null != ent.getAlias()) { // (some corrupt gazateer entry)

                //Debug:
                //System.out.println("entity=" + ent.getGazateerIndex());
                //System.out.println("aliases=" + Arrays.toString(ent.getAlias().toArray()));

                // Insert into the elasticsearch index

                //Debug:
                //System.out.println(new Gson().toJson(ent, GazateerPojo.class));

                // Handle groups (system group is: "4c927585d591d31d7b37097a")
                if (null == ent.getCommunityId()) {
                    ent.setCommunityId(new ObjectId("4c927585d591d31d7b37097a"));
                }

                //TEST: index documemt
                //               ent.synchronizeWithIndex();
                //               boolean b = elasticManager.addDocument(ent, ent.getGazateerIndex(), true);

                //TEST: remove document
                //b = elasticManager.removeDocument(ent.getGazateerIndex());

                //TEST: (part of get, bulk add/delete)
                entIds.add(ent.getIndex());

                // Debug:
                //               if (!b) {
                //                  System.out.println("Didn't add " + ent.getGazateerIndex());                  
                //               }               
            }

        } // End loop over entities

        //TEST: bulk delete
        //elasticManager.bulkAddDocuments(entities, "index", null);
        //elasticManager.bulkDeleteDocuments(entIds);

        //TEST: get document
        //         elasticManager.getRawClient().admin().indices().refresh(Requests.refreshRequest(indexName)).actionGet();
        //         for (String id: entIds) {
        //            Map<String, GetField> results = elasticManager.getDocument(id,"doccount", "disambiguated_name");
        //            System.out.println(id + ": " + results.get("doccount").values().get(0) + " , " + results.get("disambiguated_name").values().get(0));
        //         }

        //TEST: search
        //         elasticManager.getRawClient().admin().indices().refresh(Requests.refreshRequest(indexName)).actionGet();
        //         SearchRequestBuilder searchOptions = elasticManager.getSearchOptions();
        //         XContentQueryBuilder queryObj = QueryBuilders.matchAllQuery();
        //         searchOptions.addSort("doccount", SortOrder.DESC);
        //         searchOptions.addFields("doccount", "type");
        //         SearchResponse rsp = elasticManager.doQuery(queryObj, searchOptions);
        //         SearchHit[] docs = rsp.getHits().getHits();
        //         for (SearchHit hit: docs) {
        //            String id = hit.getId();
        //            Long doccount = (Long) hit.field("doccount").value();
        //            String type = (String) hit.field("type").value();
        //            System.out.println(id + ": " + doccount + ", " + type);
        //         }         

    } catch (NumberFormatException e) {
        e.printStackTrace();
    } catch (UnknownHostException e) {
        e.printStackTrace();
    } catch (MongoException e) {
        e.printStackTrace();
    } finally {

        if (null != mongoDB) {
            mongoDB.close();
        }
        if (null != elasticManager) {
            //NB not sure when exactly to call this - probably can just not bother?
            //elasticManager.getRawClient().close();
        }
    }
}

From source file:com.liferay.mongodb.hook.service.impl.MongoExpandoRowLocalServiceImpl.java

License:Open Source License

protected List<ExpandoRow> getRows(ExpandoTable expandoTable, int start, int end) {

    DBCollection dbCollection = MongoDBUtil.getCollection(expandoTable);

    DBCursor dbCursor = dbCollection.find(new BasicDBObject());

    if ((start != QueryUtil.ALL_POS) && (end != QueryUtil.ALL_POS)) {
        dbCursor.limit(start);//from ww w . j av a  2  s .  c o  m
        dbCursor.limit(end - start);
    }

    List<ExpandoRow> expandoRows = new ArrayList<ExpandoRow>();

    for (DBObject expandoRowDBObject : dbCursor.toArray()) {
        ExpandoRow expandoRow = toExpandoRow(expandoRowDBObject, expandoTable);

        expandoRows.add(expandoRow);
    }

    return expandoRows;
}

From source file:com.liferay.mongodb.hook.service.impl.MongoExpandoValueLocalServiceImpl.java

License:Open Source License

@Override
public List<ExpandoValue> getColumnValues(long companyId, long classNameId, String tableName, String columnName,
        String data, int start, int end) {

    try {//w w w . ja v  a 2s . com
        ExpandoColumn expandoColumn = ExpandoColumnLocalServiceUtil.getColumn(companyId, classNameId, tableName,
                columnName);

        DBCollection dbCollection = MongoDBUtil.getCollection(companyId, classNameId, tableName);

        DBCursor dbCursor = null;

        if (Validator.isNotNull(data)) {
            DBObject queryDBObject = new BasicDBObject();

            ExpandoValue expandoValue = ExpandoValueUtil.create(0);

            expandoValue.setColumnId(expandoColumn.getColumnId());
            expandoValue.setData(data);

            queryDBObject.put(columnName, getData(expandoColumn, expandoValue));

            dbCursor = dbCollection.find(queryDBObject);
        } else {
            dbCursor = dbCollection.find();
        }

        if ((start != QueryUtil.ALL_POS) && (end != QueryUtil.ALL_POS)) {
            dbCursor = dbCursor.skip(start).limit(end - start);
        }

        List<ExpandoValue> expandoValues = new ArrayList<ExpandoValue>();

        for (DBObject dbObject : dbCursor.toArray()) {
            BasicDBObject expandoValueDBObject = (BasicDBObject) dbObject;

            ExpandoValue expandoValue = toExpandoValue(expandoValueDBObject, expandoColumn);

            expandoValues.add(expandoValue);
        }

        return expandoValues;
    } catch (PortalException pe) {
        throw new SystemException(pe);
    }
}

From source file:com.linuxbox.enkive.workspace.mongo.MongoSearchResultUtils.java

License:Open Source License

protected LinkedHashSet<String> sortMessages(Set<String> messageIds, String sortField, int sortDirection) {
    LinkedHashSet<String> sortedIds = new LinkedHashSet<String>();
    // Only want to return the ids
    BasicDBObject keys = new BasicDBObject();
    keys.put("_id", 1);
    keys.put(sortField, 1);/*from w  ww  .  jav  a  2s  . c om*/

    BasicDBObject query = new BasicDBObject();
    // Build object with IDs
    BasicDBList idList = new BasicDBList();
    idList.addAll(messageIds);
    BasicDBObject idQuery = new BasicDBObject();
    idQuery.put("$in", idList);
    query.put("_id", idQuery);
    // Add sort query

    DBCursor results = messageColl.find(query, keys);
    BasicDBObject orderBy = new BasicDBObject();
    orderBy.put(sortField, sortDirection);
    results = results.sort(orderBy);
    for (DBObject result : results.toArray())
        sortedIds.add((String) result.get("_id"));
    return sortedIds;
}

From source file:com.linuxbox.enkive.workspace.mongo.MongoSearchResultUtils.java

License:Open Source License

public List<String> sortSearchResults(Collection<String> searchResultIds, String sortField, int sortDirection) {
    ArrayList<String> sortedIds = new ArrayList<String>();
    // Only want to return the ids
    BasicDBObject keys = new BasicDBObject();
    keys.put("_id", 1);
    keys.put(sortField, 1);/*from  w ww  . jav a2 s .  c om*/

    BasicDBObject query = new BasicDBObject();
    // Build object with IDs
    BasicDBList idList = new BasicDBList();
    for (String Id : searchResultIds)
        idList.add(ObjectId.massageToObjectId(Id));

    BasicDBObject idQuery = new BasicDBObject();
    idQuery.put("$in", idList);
    query.put("_id", idQuery);
    // Add sort query

    DBCursor results = searchResultColl.find(query, keys);
    BasicDBObject orderBy = new BasicDBObject();
    orderBy.put(sortField, sortDirection);
    results = results.sort(orderBy);
    for (DBObject result : results.toArray())
        sortedIds.add(((ObjectId) result.get("_id")).toString());

    return sortedIds;
}