List of usage examples for com.mongodb DBCursor toArray
public List<DBObject> toArray()
From source file:com.ikanow.infinit.e.api.config.source.SourceHandler.java
License:Open Source License
/** * getPendingSources/*from w ww.j a v a2 s . c o m*/ * Get a list of sources pending approval for a list of one or more * community IDs passed via the communityid parameter * @param communityIdStrList * @return */ public ResponsePojo getPendingSources(String userIdStr, String communityIdStrList, boolean bStrip) { ResponsePojo rp = new ResponsePojo(); try { String[] communityIdStrs = SocialUtils.getCommunityIds(userIdStr, communityIdStrList); ObjectId userId = null; boolean bAdmin = RESTTools.adminLookup(userIdStr); if (!bAdmin) { userId = new ObjectId(userIdStr); // (ie not admin, may not see } Set<ObjectId> communityIdSet = new TreeSet<ObjectId>(); Set<ObjectId> ownedOrModeratedCommunityIdSet = new TreeSet<ObjectId>(); for (String s : communityIdStrs) { ObjectId communityId = new ObjectId(s); communityIdSet.add(communityId); if (null != userId) { if (isOwnerOrModerator(communityId.toString(), userIdStr)) { ownedOrModeratedCommunityIdSet.add(communityId); } } } //TESTED (owner and community owner, public and not public) // Set up the query BasicDBObject query = new BasicDBObject(); query.put(SourcePojo.isApproved_, false); query.put(SourcePojo.communityIds_, new BasicDBObject(MongoDbManager.in_, communityIdSet)); BasicDBObject fields = new BasicDBObject(); if (bStrip) { setStrippedFields(fields); } DBCursor dbc = DbManager.getIngest().getSource().find(query, fields); // Remove communityids we don't want the user to see: if (bStrip && sanityCheckStrippedSources(dbc.toArray(), bAdmin)) { rp.setData(dbc.toArray(), (BasePojoApiMap<DBObject>) null); } else { rp.setData(SourcePojo.listFromDb(dbc, SourcePojo.listType()), new SourcePojoApiMap(userId, communityIdSet, ownedOrModeratedCommunityIdSet)); } rp.setResponse(new ResponseObject("Pending Sources", true, "successfully returned pending sources")); } catch (Exception e) { // If an exception occurs log the error logger.error("Exception Message: " + e.getMessage(), e); rp.setResponse(new ResponseObject("Pending Sources", false, "error returning pending sources")); } return rp; }
From source file:com.ikanow.infinit.e.api.config.source.SourceHandler.java
License:Open Source License
/** * getUserSources/*from w ww . ja va2 s .com*/ * @param userIdStr * @param userId * @return */ public ResponsePojo getUserSources(String userIdStr, boolean bStrip) { ResponsePojo rp = new ResponsePojo(); try { boolean bAdmin = RESTTools.adminLookup(userIdStr); HashSet<ObjectId> userCommunities = SocialUtils.getUserCommunities(userIdStr); DBCursor dbc = null; BasicDBObject query = new BasicDBObject(); query.put(SourcePojo.communityIds_, new BasicDBObject(MongoDbManager.in_, userCommunities)); BasicDBObject fields = new BasicDBObject(); if (bStrip) { setStrippedFields(fields); } Set<ObjectId> ownedOrModeratedCommunityIdSet = null; if (!bAdmin) { ownedOrModeratedCommunityIdSet = new TreeSet<ObjectId>(); for (ObjectId communityId : userCommunities) { if (isOwnerOrModerator(communityId.toString(), userIdStr)) { ownedOrModeratedCommunityIdSet.add(communityId); } } } // Get all sources for admins if (bAdmin) { dbc = DbManager.getIngest().getSource().find(query, fields); } // Get only sources the user owns or owns/moderates the parent community else { query.put(SourcePojo.ownerId_, new ObjectId(userIdStr)); BasicDBObject query2 = new BasicDBObject(); query2.put(SourcePojo.communityIds_, new BasicDBObject(MongoDbManager.in_, ownedOrModeratedCommunityIdSet)); dbc = DbManager.getIngest().getSource() .find(new BasicDBObject(MongoDbManager.or_, Arrays.asList(query, query2)), fields); } if (bStrip && sanityCheckStrippedSources(dbc.toArray(), bAdmin)) { rp.setData(dbc.toArray(), (BasePojoApiMap<DBObject>) null); } else { rp.setData(SourcePojo.listFromDb(dbc, SourcePojo.listType()), new SourcePojoApiMap(null, userCommunities, null)); } rp.setResponse(new ResponseObject("User's Sources", true, "successfully returned user's sources")); } catch (Exception e) { // If an exception occurs log the error logger.error("Exception Message: " + e.getMessage(), e); rp.setResponse(new ResponseObject("User's Sources", false, "error returning user's sources")); } return rp; }
From source file:com.ikanow.infinit.e.harvest.enrichment.legacy.alchemyapi.AlchemyEntityGeoCleanser.java
License:Open Source License
public boolean cleanseGeoInDocu(DocumentPojo doc) { boolean bChangedAnything = false; Map<String, Candidate> dubiousLocations = new HashMap<String, Candidate>(); Set<String> otherRegions = new HashSet<String>(); Set<String> otherCountries = new HashSet<String>(); Set<String> otherCountriesOrRegionsReferenced = new HashSet<String>(); //Debug//from w w w . j a v a2 s .co m if (_nDebugLevel >= 2) { System.out.println( "+++++++ Doc: " + doc.getTitle() + " / " + doc.getId() + " / " + doc.getEntities().size()); } // 1] First off, let's find anything location-based and also determine if it's bad or not if (null != doc.getEntities()) for (EntityPojo ent : doc.getEntities()) { boolean bStrongCandidate = false; // People: decompose names if (EntityPojo.Dimension.Where == ent.getDimension()) { // So locations get disambiguated to one of: // "<city-etc>, <region-or-country>", or "<region-or-country>" // though can also just be left as they are. String sActualName = ent.getActual_name().toLowerCase(); if (!ent.getDisambiguatedName().toLowerCase().equals(sActualName)) { // It's been disambiguated //Debug if (_nDebugLevel >= 3) { System.out.println("disambiguous candidate: " + ent.getDisambiguatedName() + " VS " + ent.getActual_name() + " (" + ((null != ent.getSemanticLinks()) ? ent.getSemanticLinks().size() : 0) + ")"); } // OK next step, is it a disambiguation to a US town? String splitMe[] = ent.getDisambiguatedName().split(", "); if (2 == splitMe.length) { String stateOrCountry = splitMe[1]; Matcher m = _statesRegex.matcher(stateOrCountry); if (m.find()) { // This is a US disambiguation - high risk case // Short cut if state is already directly mentioned? stateOrCountry = stateOrCountry.toLowerCase(); if (!otherRegions.contains(stateOrCountry)) { // See list below - no need to go any further // OK next step - is it a possible ambiguity: ArrayList<BasicDBObject> x = new ArrayList<BasicDBObject>(); BasicDBObject inner0_0 = new BasicDBObject(MongoDbManager.not_, Pattern.compile("US")); BasicDBObject inner1_0 = new BasicDBObject("country_code", inner0_0); x.add(inner1_0); BasicDBObject inner0_1 = new BasicDBObject(MongoDbManager.gte_, 400000); BasicDBObject inner1_1 = new BasicDBObject("population", inner0_1); x.add(inner1_1); BasicDBObject dbo = new BasicDBObject(); dbo.append("search_field", sActualName); dbo.append(MongoDbManager.or_, x); DBCursor dbc = _georefDB.find(dbo); if (dbc.size() >= 1) { // Problems! //Create list of candidates Type listType = new TypeToken<LinkedList<GeoFeaturePojo>>() { }.getType(); LinkedList<GeoFeaturePojo> grpl = new Gson() .fromJson(dbc.toArray().toString(), listType); //Debug if (_nDebugLevel >= 2) { System.out.println("\tERROR CANDIDATE: " + ent.getDisambiguatedName() + " VS " + ent.getActual_name() + " (" + dbc.count() + ")"); if (_nDebugLevel >= 3) { for (GeoFeaturePojo grp : grpl) { System.out.println("\t\tCandidate:" + grp.getCity() + " / " + grp.getRegion() + " / " + grp.getCountry()); } } } Candidate candidate = new Candidate(ent, grpl, stateOrCountry); dubiousLocations.put(ent.getIndex(), candidate); bStrongCandidate = true; } // if strong candidate } //TESTED ("reston, virginia" after "virginia/stateorcounty" mention) // (end if can't shortcut past all this) } // end if a US town } // end if in the format "A, B" } // if weak candidate //TESTED if (!bStrongCandidate) { // Obv can't count on a disambiguous candidate: String type = ent.getType().toLowerCase(); if (type.equals("stateorcounty")) { String disName = ent.getDisambiguatedName().toLowerCase(); if (_abbrStateRegex.matcher(disName).matches()) { otherRegions.add(getStateFromAbbr(disName)); } else { otherRegions.add(ent.getDisambiguatedName().toLowerCase()); } otherCountriesOrRegionsReferenced.add("united states"); } //TESTED: "mich./stateorcounty" else if (type.equals("country")) { String disName = ent.getDisambiguatedName().toLowerCase(); // Translation of known badly transcribed countries: // (England->UK) if (disName.equals("england")) { otherCountries.add("united kingdom"); } //TESTED else { otherCountries.add(ent.getDisambiguatedName().toLowerCase()); } } else if (type.equals("region")) { otherRegions.add(ent.getDisambiguatedName().toLowerCase()); } else if (type.equals("city")) { String splitMe[] = ent.getDisambiguatedName().split(",\\s*"); if (2 == splitMe.length) { otherCountriesOrRegionsReferenced.add(splitMe[1].toLowerCase()); if (this._statesRegex.matcher(splitMe[1]).find()) { otherCountriesOrRegionsReferenced.add("united states"); } //TESTED: "lexingon, kentucky/city" } } } //TESTED: just above clauses } // if location } // (end loop over entities) // Debug: if ((_nDebugLevel >= 3) && (!dubiousLocations.isEmpty())) { for (String s : otherRegions) { System.out.println("Strong region: " + s); } for (String s : otherCountries) { System.out.println("Strong countries: " + s); } for (String s : otherCountriesOrRegionsReferenced) { System.out.println("Weak regionscountries: " + s); } } // 2] The requirements and algorithm are discussed in // http://ikanow.jira.com/wiki/display/INF/Beta...+improving+AlchemyAPI+extraction+%28geo%29 // Canonical cases: // Darfur -> Darfur, MN even though Sudan and sometimes Darfur, Sudan are present // Shanghai -> Shanghai, WV even though China is mentioned (and not WV) // Manchester -> Manchester village, NY (not Manchester, UK) // Philadelphia -> Philadelphia (village), NY (though NY is mentioned and not PA) // We're generating the following order // 10] Sitting tenant with strong direct // 15] Large city with strong direct // 20] Region with direct // 30] Large city with strong indirect // 40] Sitting tenant with strong indirect // 50] Region with indirect // 60] Another foreign possibility with strong direct // 70] Large city with weak direct // 72] Large city with weak indirect // 75] Large city with no reference // 78] Another foreign possibility with strong indirect (>100K population - ie not insignificant) // 80] Sitting tenant with any weak (US) direct or indirect // 90] Another foreign possibility with strong indirect // 100] Another foreign possibility with weak direct // 110] Another foreign possibility with weak indirect // 120] Region with no reference, if there is only 1 // 130] Sitting tenant with none of the above (ie default) // 140] Anything else! for (Map.Entry<String, Candidate> pair : dubiousLocations.entrySet()) { EntityPojo ent = pair.getValue().entity; Candidate candidate = pair.getValue(); // 2.1] Let's analyse the "sitting tenant" int nPrio = 130; GeoFeaturePojo currLeader = null; int nCase = 0; // (just for debugging, 0=st, 1=large city, 2=region, 3=other) if (otherRegions.contains(candidate.state)) { // Strong direct ref, winner! nPrio = 10; // winner! } //TESTED: "san antonio, texas/city" vs "texas" else if (otherCountriesOrRegionsReferenced.contains(candidate.state)) { // Indirect ref nPrio = 40; // good, but beatable... } //TESTED: "philadelphia (village), new york/city" else if (otherCountries.contains("united states")) { // Weak direct ref nPrio = 80; // better than nothing... } //TESTED: "apache, oklahoma/city" else if (otherCountriesOrRegionsReferenced.contains("united states")) { // Weak indirect ref nPrio = 80; // better than nothing... } //TESTED: "washington, d.c." have DC as stateorcounty, but US in countries list // Special case: we don't like "village": if ((80 != nPrio) && ent.getDisambiguatedName().contains("village") && !ent.getActual_name().contains("village")) { nPrio = 80; } //TESTED: "Downvoted: Philadelphia (village), New York from Philadelphia" // Debug if (_nDebugLevel >= 2) { System.out.println(pair.getKey() + " SittingTenantScore=" + nPrio); } // Alternatives if (nPrio > 10) { LinkedList<GeoFeaturePojo> geos = pair.getValue().candidates; for (GeoFeaturePojo geo : geos) { int nAltPrio = 140; int nAltCase = -1; String city = (null != geo.getCity()) ? geo.getCity().toLowerCase() : null; String region = (null != geo.getRegion()) ? geo.getRegion().toLowerCase() : null; String country = (null != geo.getCountry()) ? geo.getCountry().toLowerCase() : null; // 2.2] CASE 1: I'm a city with pop > 1M (best score 15) // 15] Large city with strong direct // 30] Large city with strong indirect // 70] Large city with weak direct // 72] Large city with weak indirect // 75] Large city with no reference if ((null != city) && (geo.getPopulation() >= 400000) && (nPrio > 15)) { nAltCase = 1; if ((null != region) && (otherRegions.contains(region))) { nAltPrio = 15; // strong direct } //TESTED: "dallas / Texas / United States = 15" else if ((null != region) && (otherCountriesOrRegionsReferenced.contains(region))) { nAltPrio = 30; // strong indirect } //TESTED: "sacramento / California / United State" else if ((null != country) && (otherCountries.contains(country))) { nAltPrio = 70; // weak direct } //TESTED: "berlin, germany", with "germany" directly mentioned else if ((null != country) && (otherCountriesOrRegionsReferenced.contains(country))) { nAltPrio = 72; // weak indirect } //TESTED: "los angeles / California / United States = 72" else { nAltPrio = 75; // just for being big! } //TESTED: "barcelona, spain" } // 2.3] CASE 2: I'm a region (best score=20, can beat current score) // 20] Region with direct // 50] Region with indirect // 120] Region with no reference, if there is only 1 else if ((null == city) && (nPrio > 20)) { nAltCase = 2; if ((null != country) && (otherCountries.contains(country))) { nAltPrio = 20; // strong direct } //TESTED: (region) "Berlin, Germany" with "Germany" mentioned else if ((null != country) && (otherCountriesOrRegionsReferenced.contains(country))) { nAltPrio = 50; // strong indirect } //(haven't seen, but we'll live) else { nAltPrio = 120; // (just for being there) } //TESTED: "null / Portland / Jamaica = 120", also "Shanghai / China" } // 2.4] CASE 3: I'm any foreign possibility (best score=60) // 60] Another foreign possibility with strong direct // 78] Another foreign possibility with strong indirect (>100K population - ie not insignificant) // 90] Another foreign possibility with strong indirect // 100] Another foreign possibility with weak direct // 110] Another foreign possibility with weak indirect else if (nPrio > 60) { nAltCase = 3; if ((null != region) && (otherRegions.contains(region))) { nAltPrio = 60; // strong direct // Double check we're not falling into the trap below: if (!geo.getCountry_code().equals("US")) { Matcher m = this._statesRegex.matcher(geo.getRegion()); if (m.matches()) { // non US state matching against (probably) US state, disregard) nAltPrio = 140; } } //TESTED (same clause as below) } //TESTED: lol "philadelphia / Maryland / Liberia = 60" (before above extra clause) if (nAltPrio > 60) { // (may need to re-run test) if ((null != country) && (otherCountries.contains(country))) { if (geo.getPopulation() < 100000) { nAltPrio = 90; // strong indirect } //TESTED: "washington / Villa Clara / Cuba" else { nAltPrio = 78; // strong indirect, with boost! } //TESTED: "geneva, Geneve, Switzerland", pop 180K } else if ((null != region) && (otherCountriesOrRegionsReferenced.contains(region))) { nAltPrio = 100; // weak direct } //TESTED: "lincoln / Lincolnshire / United Kingdom = 100" else if ((null != country) && (otherCountriesOrRegionsReferenced.contains(country))) { nAltPrio = 110; // weak indirect } //(haven't seen, but we'll live) } } // Debug: if ((_nDebugLevel >= 2) && (nAltPrio < 140)) { System.out.println("----Alternative: " + geo.getCity() + " / " + geo.getRegion() + " / " + geo.getCountry() + " score=" + nAltPrio); } // Outcome of results: if (nAltPrio < nPrio) { currLeader = geo; nPrio = nAltPrio; nCase = nAltCase; } } // end loop over alternativse if (null != currLeader) { // Need to change if (1 == nCase) { this._nMovedToLargeCity++; //(Cities are lower case in georef DB for some reason) String city = WordUtils.capitalize(currLeader.getCity()); if (currLeader.getCountry_code().equals("US")) { // Special case: is this just the original? String region = currLeader.getRegion(); if (region.equals("District of Columbia")) { // Special special case region = "D.C."; } String sCandidate = city + ", " + region; if (!sCandidate.equals(ent.getDisambiguatedName())) { ent.setDisambiguatedName(sCandidate); ent.setIndex(ent.getDisambiguatedName() + "/city"); ent.setSemanticLinks(null); bChangedAnything = true; } //TESTED (lots, eg "Philadelphia (village), New York" -> "Philadelphia, PA"; Wash, Ill. -> Wash DC) else { this._nMovedToLargeCity--; _nStayedWithOriginal++; } //TESTED ("Washington DC", "San Juan, Puerto Rico") } //TESTED (see above) else { ent.setDisambiguatedName(city + ", " + currLeader.getCountry()); ent.setIndex(ent.getDisambiguatedName() + "/city"); ent.setSemanticLinks(null); bChangedAnything = true; } //TESTED: "london, california/city to London, United Kingdom" } else if (2 == nCase) { this._nMovedToRegion++; ent.setDisambiguatedName(currLeader.getRegion() + ", " + currLeader.getCountry()); ent.setIndex(ent.getDisambiguatedName() + "/region"); ent.setSemanticLinks(null); bChangedAnything = true; } //TESTED: "Moved madrid, new york/city to Madrid, Spain" (treats Madrid as region, like Berlin see above) else { //(Cities are lower case in georef DB for some reason) String city = WordUtils.capitalize(currLeader.getCity()); this._nMovedToForeignCity++; ent.setDisambiguatedName(city + ", " + currLeader.getCountry()); ent.setIndex(ent.getDisambiguatedName() + "/city"); ent.setSemanticLinks(null); bChangedAnything = true; } //TESTED: "Moved geneva, new york/city to Geneva, Switzerland" if ((_nDebugLevel >= 1) && (null == ent.getSemanticLinks())) { System.out.println("++++ Moved " + pair.getKey() + " to " + ent.getDisambiguatedName()); } } else { _nStayedWithOriginal++; } } // (if sitting tenant not holder) } // (end loop over candidates) if ((_nDebugLevel >= 1) && bChangedAnything) { System.out.println("\t(((Doc: " + doc.getTitle() + " / " + doc.getId() + " / " + doc.getUrl() + ")))"); } return bChangedAnything; }
From source file:com.ikanow.infinit.e.harvest.extraction.document.file.InternalInfiniteFile.java
License:Open Source License
public InternalInfiniteFile(String url, NtlmPasswordAuthentication auth) throws MalformedURLException { try {//from w ww . j a v a 2 s. c o m ObjectId locationId = null; ObjectId ownerId = null; String communityIdsStr = null; if (url.startsWith(INFINITE_SHARE_PREFIX)) { _isShare = true; locationId = new ObjectId(url.substring(INFINITE_SHARE_PREFIX_LEN).replaceFirst("/.*$", "")); // remove trailing /s, can be used for information //TESTED (2.1, 2.2.1, 2.3) BasicDBObject query = new BasicDBObject(SharePojo._id_, locationId); _resultObj = (BasicDBObject) MongoDbManager.getSocial().getShare().findOne(query); if (null == _resultObj) { throw new MalformedURLException("Not found (or not authorized): " + url); } //TESTED (7.1) String mediaType = (String) _resultObj.get(SharePojo.mediaType_); if ((null != mediaType) && (mediaType.equalsIgnoreCase("application/x-zip-compressed") || mediaType.equalsIgnoreCase("application/zip"))) { _isDirectory = true; ObjectId fileId = _resultObj.getObjectId(SharePojo.binaryId_); GridFSRandomAccessFile file = new GridFSRandomAccessFile( MongoDbManager.getSocial().getShareBinary(), fileId); _zipView = new GridFSZipFile(_resultObj.getString(SharePojo.title_), file); } //TESTED (3.1) else { // Single share if (_resultObj.containsField(SharePojo.documentLocation_)) { throw new MalformedURLException("Reference shares are not currently supported"); } //TESTED (0.1) _isDirectory = false; // (this will get traversed as the initial "directory", which doesn't check isDirectory... //... and will return itself as a single file in the "directory") } //TESTED (1.1, 2.1, 3.1) } //TESTED else if (url.startsWith(INFINITE_CUSTOM_PREFIX)) { _isCustom = true; _isDirectory = true; BasicDBObject query = null; String locationStr = url.substring(INFINITE_CUSTOM_PREFIX_LEN).replaceFirst("/.*$", ""); StringBuffer sb = new StringBuffer(INFINITE_CUSTOM_PREFIX); try { locationId = new ObjectId(locationStr); query = new BasicDBObject(CustomMapReduceJobPojo._id_, locationId); } //TESTED (4.1) catch (Exception e) { // for custom jobs can also specify the job name query = new BasicDBObject(CustomMapReduceJobPojo.jobtitle_, locationStr); } //TESTED (5.1, 6.1) _resultObj = (BasicDBObject) MongoDbManager.getCustom().getLookup().findOne(query); if (null == _resultObj) { throw new MalformedURLException("Not found (or not authorized): " + url); } //TESTED (7.2, 7.3) if (null != locationId) { sb.append(locationStr).append('/') .append(_resultObj.getString(CustomMapReduceJobPojo.jobtitle_)).append('/'); } //TESTED (5.1, 6.1) else { sb.append(_resultObj.getObjectId(CustomMapReduceJobPojo._id_).toString()).append('/') .append(locationStr).append('/'); } //TESTED (4.1) _originalUrl = sb.toString(); _isCustomAppend = _resultObj.getBoolean(CustomMapReduceJobPojo.appendResults_, false); String outputDatabase = _resultObj.getString(CustomMapReduceJobPojo.outputDatabase_); String outputCollection = _resultObj.getString(CustomMapReduceJobPojo.outputCollection_); if (null == outputDatabase) { outputDatabase = "custommr"; } DBCollection outColl = MongoDbManager.getCollection(outputDatabase, outputCollection); BasicDBObject sort = null; if (_isCustomAppend) { // Use time of _last_ record as file time sort = new BasicDBObject("_id", -1); } else { // Use time of _first_ record as file time sort = new BasicDBObject("_id", 1); } //TESTED DBCursor dbc = outColl.find().sort(sort).limit(1); List<DBObject> firstOrLastRecordList = dbc.toArray(); if (!firstOrLastRecordList.isEmpty()) { _overwriteTime = ((ObjectId) firstOrLastRecordList.iterator().next().get("_id")).getTime(); } else { // No records, use lastRunTime_ as backup _overwriteTime = _resultObj.getDate(CustomMapReduceJobPojo.lastRunTime_, new Date()).getTime(); } //TOTEST } //TESTED else { throw new MalformedURLException("Not recognized: " + url); } //TESTED (7.4) communityIdsStr = auth.getDomain(); ownerId = new ObjectId(auth.getUsername()); // Now do some authentication: // Check communities first since that involves no external DB queries: boolean isAuthorized = false; if (_isShare) { BasicDBList communities = (BasicDBList) _resultObj.get(SharePojo.communities_); for (Object communityObj : communities) { BasicDBObject communityDbo = (BasicDBObject) communityObj; ObjectId commId = communityDbo.getObjectId("_id"); if (communityIdsStr.contains(commId.toString())) { isAuthorized = true; break; } } } //TESTED (7.*) else { //_isCustom BasicDBList communities = (BasicDBList) _resultObj.get(CustomMapReduceJobPojo.communityIds_); for (Object communityObj : communities) { ObjectId commId = (ObjectId) communityObj; if (communityIdsStr.equals(commId)) { isAuthorized = true; break; } } } //TESTED (7.*) if (!isAuthorized) { // Still OK ... only if user is an admin isAuthorized = AuthUtils.isAdmin(ownerId); } //TESTED (1,2,3,4,5,6) if (!isAuthorized) { // Permission fail throw new MalformedURLException("Not found (or not authorized): " + url); } //TESTED (7.5) } catch (Exception e) { throw new MalformedURLException("Invalid authentication or location: " + e.getMessage()); } //(just passed exceptions on) // Save original URL if (_isShare) { // (custom handled above) if (!url.endsWith("/")) { _originalUrl = url + "/"; } else { _originalUrl = url; } } //(TESTED 1.3, 2.3, 3.3) }
From source file:com.ikanow.infinit.e.processing.generic.GenericProcessingController.java
License:Open Source License
public void InitializeIndex(boolean bDeleteDocs, boolean bDeleteEntityFeature, boolean bDeleteEventFeature, boolean bRebuildDocsIndex) { try { //create elasticsearch indexes if (!ElasticSearchManager.pingIndex(null, null)) { throw new RuntimeException("Index is red, disable indexing operations"); } //TESTED PropertiesManager pm = new PropertiesManager(); if (!pm.getAggregationDisabled()) { boolean languageNormalization = pm.getNormalizeEncoding(); Builder localSettingsEvent = ImmutableSettings.settingsBuilder(); localSettingsEvent.put("number_of_shards", 10).put("number_of_replicas", 2); localSettingsEvent.put("index.analysis.analyzer.suggestAnalyzer.tokenizer", "standard"); if (languageNormalization) { localSettingsEvent.putArray("index.analysis.analyzer.suggestAnalyzer.filter", "icu_normalizer", "icu_folding", "standard", "lowercase"); } else { localSettingsEvent.putArray("index.analysis.analyzer.suggestAnalyzer.filter", "standard", "lowercase"); }/*from ww w .j a va 2s .c o m*/ Builder localSettingsGaz = ImmutableSettings.settingsBuilder(); localSettingsGaz.put("number_of_shards", 10).put("number_of_replicas", 2); localSettingsGaz.put("index.analysis.analyzer.suggestAnalyzer.tokenizer", "standard"); if (languageNormalization) { localSettingsGaz.putArray("index.analysis.analyzer.suggestAnalyzer.filter", "icu_normalizer", "icu_folding", "standard", "lowercase"); } else { localSettingsGaz.putArray("index.analysis.analyzer.suggestAnalyzer.filter", "standard", "lowercase"); } //event feature String eventGazMapping = new Gson().toJson(new AssociationFeaturePojoIndexMap.Mapping(), AssociationFeaturePojoIndexMap.Mapping.class); ElasticSearchManager eventIndex = IndexManager.createIndex( AssociationFeaturePojoIndexMap.indexName_, null, false, null, eventGazMapping, localSettingsEvent); if (null == eventIndex) { // (if has been previously referenced in this process space) eventIndex = IndexManager.getIndex(AssociationFeaturePojoIndexMap.indexName_); } eventIndex.createAlias(AssociationFeaturePojoIndexMap.indexCollectionName_); if (bDeleteEventFeature) { eventIndex.deleteMe(); eventIndex = IndexManager.createIndex(AssociationFeaturePojoIndexMap.indexName_, null, false, null, eventGazMapping, localSettingsEvent); } //entity feature String gazMapping = new Gson().toJson(new EntityFeaturePojoIndexMap.Mapping(), EntityFeaturePojoIndexMap.Mapping.class); ElasticSearchManager entityIndex = IndexManager.createIndex(EntityFeaturePojoIndexMap.indexName_, null, false, null, gazMapping, localSettingsGaz); if (null == entityIndex) { // (if has been previously referenced in this process space) entityIndex = IndexManager.getIndex(EntityFeaturePojoIndexMap.indexName_); } entityIndex.createAlias(EntityFeaturePojoIndexMap.indexCollectionName_); if (bDeleteEntityFeature) { entityIndex.deleteMe(); entityIndex = IndexManager.createIndex(EntityFeaturePojoIndexMap.indexName_, null, false, null, gazMapping, localSettingsGaz); } } //DOCS - much more complicated than anything else boolean bPingMainIndexFailed = !ElasticSearchManager .pingIndex(DocumentPojoIndexMap.globalDocumentIndex_); // (ie if main doc index doesn't exist then always rebuild all indexes) if (bPingMainIndexFailed) { // extra level of robustness... sleep for a minute then double check the index is really missing... try { Thread.sleep(60000); } catch (Exception e) { } bPingMainIndexFailed = !ElasticSearchManager.pingIndex(DocumentPojoIndexMap.globalDocumentIndex_); } bRebuildDocsIndex |= bPingMainIndexFailed; // check the main index has the "collection" alias - if not then rebuild everything if (!bPingMainIndexFailed && (null == _aliasInfo)) { ElasticSearchManager docIndex = ElasticSearchManager .getIndex(DocumentPojoIndexMap.globalDocumentIndex_); ClusterStateResponse clusterState = docIndex.getRawClient().admin().cluster() .state(new ClusterStateRequest()).actionGet(); _aliasInfo = CrossVersionImmutableMapOfImmutableMaps .getAliases(clusterState.getState().getMetaData()); if (!_aliasInfo.containsKey(DocumentPojoIndexMap.globalDocumentIndexCollection_)) { bRebuildDocsIndex = true; } } //TESTED createCommunityDocIndex(DocumentPojoIndexMap.globalDocumentIndex_, null, false, true, bDeleteDocs); createCommunityDocIndex(DocumentPojoIndexMap.manyGeoDocumentIndex_, null, false, false, bDeleteDocs); // Some hardwired dummy communities createCommunityDocIndex("4e3706c48d26852237078005", null, true, false, bDeleteDocs); // (admin) createCommunityDocIndex("4e3706c48d26852237079004", null, true, false, bDeleteDocs); // (test user) // (create dummy index used to keep personal group aliases) if (bRebuildDocsIndex || bDeleteDocs) { // OK, going to have different shards for different communities: // Get a list of all the communities: BasicDBObject query = new BasicDBObject(); BasicDBObject fieldsToDrop = new BasicDBObject("members", 0); fieldsToDrop.put("communityAttributes", 0); fieldsToDrop.put("userAttributes", 0); DBCursor dbc = DbManager.getSocial().getCommunity().find(query, fieldsToDrop); List<DBObject> tmparray = dbc.toArray(); // (brings the entire thing into memory so don't get cursor timeouts) int i = 0; System.out.println("Initializing " + dbc.size() + " indexes:"); for (int j = 0; j < 2; ++j) { for (DBObject dbotmp : tmparray) { if ((++i % 100) == 0) { System.out.println("Initialized " + i + " indexes."); } BasicDBObject dbo = (BasicDBObject) dbotmp; // OK, going to see if there are any sources with this group id, create a new index if so: // (Don't use CommunityPojo data model here for performance reasons.... // (Also, haven't gotten round to porting CommunityPojo field access to using static fields)) ObjectId communityId = (ObjectId) dbo.get("_id"); boolean bPersonalGroup = dbo.getBoolean("isPersonalCommunity", false); boolean bSystemGroup = dbo.getBoolean("isSystemCommunity", false); ObjectId parentCommunityId = (ObjectId) dbo.get("parentId"); createCommunityDocIndex(communityId.toString(), parentCommunityId, bPersonalGroup, bSystemGroup, bDeleteDocs, j == 0); } //end loop over communities } // end loop over communities - first time parents only } // (end if need to do big loop over all sources) } catch (Exception e) { //DEBUG //e.printStackTrace(); throw new RuntimeException(e.getMessage()); } }
From source file:com.ikanow.infinit.e.utility.MongoEntityFeatureTxfer.java
License:Apache License
@SuppressWarnings("unused") private void doUnitTestCode(String sMongoDbHost, String sMongoDbPort, String sElasticHost, String sElasticPort, BasicDBObject query, int nLimit) { Mongo mongoDB = null;/*from ww w . j a va 2 s . c o m*/ ElasticSearchManager elasticManager = null; try { // Initialize the DB: mongoDB = new Mongo(sMongoDbHost, Integer.parseInt(sMongoDbPort)); DBCollection gazDB = mongoDB.getDB("feature").getCollection("entity"); // Initialize the ES (create the index if it doesn't already): // 1. Set-up the entity feature index String indexName = "entity_index"; //TEST: delete the index: // elasticManager = ElasticSearchManager.getIndex(indexName, sElasticHost + ":" + sElasticPort); // elasticManager.deleteMe(); //TEST: create the index // String sMapping = new Gson().toJson(new GazateerPojo.Mapping(), GazateerPojo.Mapping.class); // Builder localSettings = ImmutableSettings.settingsBuilder(); // localSettings.put("number_of_shards", 1).put("number_of_replicas", 0); q // elasticManager = ElasticSearchManager.createIndex // (indexName, false, // sElasticHost + ":" + sElasticPort, // sMapping, localSettings); //TEST: delete the index: // elasticManager.deleteMe(); //TEST: get the index: // elasticManager = ElasticSearchManager.getIndex(indexName, sElasticHost + ":" + sElasticPort); // Now query the DB: DBCursor dbc = null; if (nLimit > 0) { dbc = gazDB.find(query).limit(nLimit); } else { // Everything! dbc = gazDB.find(query); } Type listType = new TypeToken<ArrayList<EntityFeaturePojo>>() { }.getType(); List<EntityFeaturePojo> entities = new Gson().fromJson(dbc.toArray().toString(), listType); //Debug: List<String> entIds = new LinkedList<String>(); // Loop over array and invoke the cleansing function for each one for (EntityFeaturePojo ent : entities) { if (null != ent.getAlias()) { // (some corrupt gazateer entry) //Debug: //System.out.println("entity=" + ent.getGazateerIndex()); //System.out.println("aliases=" + Arrays.toString(ent.getAlias().toArray())); // Insert into the elasticsearch index //Debug: //System.out.println(new Gson().toJson(ent, GazateerPojo.class)); // Handle groups (system group is: "4c927585d591d31d7b37097a") if (null == ent.getCommunityId()) { ent.setCommunityId(new ObjectId("4c927585d591d31d7b37097a")); } //TEST: index documemt // ent.synchronizeWithIndex(); // boolean b = elasticManager.addDocument(ent, ent.getGazateerIndex(), true); //TEST: remove document //b = elasticManager.removeDocument(ent.getGazateerIndex()); //TEST: (part of get, bulk add/delete) entIds.add(ent.getIndex()); // Debug: // if (!b) { // System.out.println("Didn't add " + ent.getGazateerIndex()); // } } } // End loop over entities //TEST: bulk delete //elasticManager.bulkAddDocuments(entities, "index", null); //elasticManager.bulkDeleteDocuments(entIds); //TEST: get document // elasticManager.getRawClient().admin().indices().refresh(Requests.refreshRequest(indexName)).actionGet(); // for (String id: entIds) { // Map<String, GetField> results = elasticManager.getDocument(id,"doccount", "disambiguated_name"); // System.out.println(id + ": " + results.get("doccount").values().get(0) + " , " + results.get("disambiguated_name").values().get(0)); // } //TEST: search // elasticManager.getRawClient().admin().indices().refresh(Requests.refreshRequest(indexName)).actionGet(); // SearchRequestBuilder searchOptions = elasticManager.getSearchOptions(); // XContentQueryBuilder queryObj = QueryBuilders.matchAllQuery(); // searchOptions.addSort("doccount", SortOrder.DESC); // searchOptions.addFields("doccount", "type"); // SearchResponse rsp = elasticManager.doQuery(queryObj, searchOptions); // SearchHit[] docs = rsp.getHits().getHits(); // for (SearchHit hit: docs) { // String id = hit.getId(); // Long doccount = (Long) hit.field("doccount").value(); // String type = (String) hit.field("type").value(); // System.out.println(id + ": " + doccount + ", " + type); // } } catch (NumberFormatException e) { e.printStackTrace(); } catch (UnknownHostException e) { e.printStackTrace(); } catch (MongoException e) { e.printStackTrace(); } finally { if (null != mongoDB) { mongoDB.close(); } if (null != elasticManager) { //NB not sure when exactly to call this - probably can just not bother? //elasticManager.getRawClient().close(); } } }
From source file:com.liferay.mongodb.hook.service.impl.MongoExpandoRowLocalServiceImpl.java
License:Open Source License
protected List<ExpandoRow> getRows(ExpandoTable expandoTable, int start, int end) { DBCollection dbCollection = MongoDBUtil.getCollection(expandoTable); DBCursor dbCursor = dbCollection.find(new BasicDBObject()); if ((start != QueryUtil.ALL_POS) && (end != QueryUtil.ALL_POS)) { dbCursor.limit(start);//from ww w . j av a 2 s . c o m dbCursor.limit(end - start); } List<ExpandoRow> expandoRows = new ArrayList<ExpandoRow>(); for (DBObject expandoRowDBObject : dbCursor.toArray()) { ExpandoRow expandoRow = toExpandoRow(expandoRowDBObject, expandoTable); expandoRows.add(expandoRow); } return expandoRows; }
From source file:com.liferay.mongodb.hook.service.impl.MongoExpandoValueLocalServiceImpl.java
License:Open Source License
@Override public List<ExpandoValue> getColumnValues(long companyId, long classNameId, String tableName, String columnName, String data, int start, int end) { try {//w w w . ja v a 2s . com ExpandoColumn expandoColumn = ExpandoColumnLocalServiceUtil.getColumn(companyId, classNameId, tableName, columnName); DBCollection dbCollection = MongoDBUtil.getCollection(companyId, classNameId, tableName); DBCursor dbCursor = null; if (Validator.isNotNull(data)) { DBObject queryDBObject = new BasicDBObject(); ExpandoValue expandoValue = ExpandoValueUtil.create(0); expandoValue.setColumnId(expandoColumn.getColumnId()); expandoValue.setData(data); queryDBObject.put(columnName, getData(expandoColumn, expandoValue)); dbCursor = dbCollection.find(queryDBObject); } else { dbCursor = dbCollection.find(); } if ((start != QueryUtil.ALL_POS) && (end != QueryUtil.ALL_POS)) { dbCursor = dbCursor.skip(start).limit(end - start); } List<ExpandoValue> expandoValues = new ArrayList<ExpandoValue>(); for (DBObject dbObject : dbCursor.toArray()) { BasicDBObject expandoValueDBObject = (BasicDBObject) dbObject; ExpandoValue expandoValue = toExpandoValue(expandoValueDBObject, expandoColumn); expandoValues.add(expandoValue); } return expandoValues; } catch (PortalException pe) { throw new SystemException(pe); } }
From source file:com.linuxbox.enkive.workspace.mongo.MongoSearchResultUtils.java
License:Open Source License
protected LinkedHashSet<String> sortMessages(Set<String> messageIds, String sortField, int sortDirection) { LinkedHashSet<String> sortedIds = new LinkedHashSet<String>(); // Only want to return the ids BasicDBObject keys = new BasicDBObject(); keys.put("_id", 1); keys.put(sortField, 1);/*from w ww . jav a 2s . c om*/ BasicDBObject query = new BasicDBObject(); // Build object with IDs BasicDBList idList = new BasicDBList(); idList.addAll(messageIds); BasicDBObject idQuery = new BasicDBObject(); idQuery.put("$in", idList); query.put("_id", idQuery); // Add sort query DBCursor results = messageColl.find(query, keys); BasicDBObject orderBy = new BasicDBObject(); orderBy.put(sortField, sortDirection); results = results.sort(orderBy); for (DBObject result : results.toArray()) sortedIds.add((String) result.get("_id")); return sortedIds; }
From source file:com.linuxbox.enkive.workspace.mongo.MongoSearchResultUtils.java
License:Open Source License
public List<String> sortSearchResults(Collection<String> searchResultIds, String sortField, int sortDirection) { ArrayList<String> sortedIds = new ArrayList<String>(); // Only want to return the ids BasicDBObject keys = new BasicDBObject(); keys.put("_id", 1); keys.put(sortField, 1);/*from w ww . jav a2 s . c om*/ BasicDBObject query = new BasicDBObject(); // Build object with IDs BasicDBList idList = new BasicDBList(); for (String Id : searchResultIds) idList.add(ObjectId.massageToObjectId(Id)); BasicDBObject idQuery = new BasicDBObject(); idQuery.put("$in", idList); query.put("_id", idQuery); // Add sort query DBCursor results = searchResultColl.find(query, keys); BasicDBObject orderBy = new BasicDBObject(); orderBy.put(sortField, sortDirection); results = results.sort(orderBy); for (DBObject result : results.toArray()) sortedIds.add(((ObjectId) result.get("_id")).toString()); return sortedIds; }