List of usage examples for com.mongodb DBCursor skip
public DBCursor skip(final int numberOfElements)
From source file:com.cyslab.craftvm.rest.mongo.QueryServlet.java
License:GNU General Public License
@Override protected void doPost(HttpServletRequest req, HttpServletResponse res) throws ServletException, IOException { log.trace("doPost()"); // server auth if (!can_read(req)) { res.sendError(SC_UNAUTHORIZED);//from ww w. j ava 2s. com return; } String ret = null; InputStream is = req.getInputStream(); String db_name = req.getParameter("dbname"); String col_name = req.getParameter("colname"); if (db_name == null || col_name == null) { String names[] = req2mongonames(req); if (names != null) { db_name = names[0]; col_name = names[1]; } if (db_name == null || col_name == null) { error(res, SC_BAD_REQUEST, Status.get("param name missing")); return; } } String skip = req.getParameter("skip"); String limit = req.getParameter("limit"); String _fields = req.getParameter("fields"); String fields[] = null; if (_fields != null) fields = _fields.split("[,]"); DB db = mongo.getDB(db_name); // mongo auth String user = req.getParameter("user"); String passwd = req.getParameter("passwd"); if (user != null && passwd != null && (!db.isAuthenticated())) { boolean auth = db.authenticate(user, passwd.toCharArray()); if (!auth) { res.sendError(SC_UNAUTHORIZED); return; } } DBCollection col = db.getCollection(col_name); StringBuilder buf = tl.get(); // reset buf buf.setLength(0); BufferedReader r = null; DBObject q = null, sort = null; try { r = new BufferedReader(new InputStreamReader(is)); String data = r.readLine(); if (data == null) { error(res, SC_BAD_REQUEST, Status.get("no data")); return; } try { q = (DBObject) JSON.parse(data); if (cache != null) { buf.append(db_name); buf.append(col_name); buf.append(data); } } catch (JSONParseException e) { error(res, SC_BAD_REQUEST, Status.get("can not parse data")); return; } // sort param data = r.readLine(); if (data != null) { try { sort = (DBObject) JSON.parse(data); } catch (JSONParseException e) { error(res, SC_BAD_REQUEST, Status.get("can not parse sort arg")); return; } if (cache != null) buf.append(data); } } finally { if (r != null) r.close(); } // limit int lim; if (limit != null) { try { lim = Math.min(Integer.parseInt(limit), MAX_FIELDS_TO_RETURN); } catch (NumberFormatException e) { error(res, SC_BAD_REQUEST, Status.get("can not parse limit")); return; } } else { lim = MAX_FIELDS_TO_RETURN; } if (cache != null) { buf.append(";limit="); buf.append(lim); } // skip int sk = -1; if (skip != null) { try { sk = Integer.parseInt(skip); if (cache != null) { buf.append(";skip="); buf.append(sk); } } catch (NumberFormatException e) { error(res, SC_BAD_REQUEST, Status.get("can not parse skip")); return; } } // fields if (cache != null && _fields != null) { buf.append(";fields="); buf.append(_fields); } // from cache String cache_key = null; if (cache != null) { cache_key = buf.toString(); try { ret = (String) cache.get(cache_key); } // some wrong char in key catch (IllegalArgumentException e) { int l = buf.length(); for (int i = 0; i < l; i++) { if (buf.charAt(i) == ' ') buf.setCharAt(i, '*'); } cache_key = buf.toString(); ret = (String) cache.get(cache_key); } if (ret != null) { out_str(req, ret, "application/json"); return; } } // cursor DBCursor c; if (fields != null) { StringBuilder sb = new StringBuilder(); sb.append("{"); int len = fields.length; for (int i = 0; i < len; i++) { String s = fields[i]; sb.append('"'); sb.append(s); sb.append('"'); sb.append(":1"); if (i != (len - 1)) sb.append(","); } sb.append("}"); c = col.find(q, (DBObject) JSON.parse(sb.toString())); } else c = col.find(q); if (c == null || c.count() == 0) { error(res, SC_NOT_FOUND, Status.get("no documents found")); return; } if (sort != null) c.sort(sort); res.setIntHeader("X-Documents-Count", c.count()); c.limit(lim); if (sk != -1) c.skip(sk); // reset buf buf.setLength(0); int no = 0; buf.append("["); while (c.hasNext()) { DBObject o = c.next(); if (rm_id) o.removeField("_id"); JSON.serialize(o, buf); buf.append(","); no++; } c.close(); if (no > 0) buf.setCharAt(buf.length() - 1, ']'); else buf.append(']'); res.setIntHeader("X-Documents-Returned", no); ret = buf.toString(); if (cache != null) cache.set(cache_key, ret); out_str(req, ret, "application/json"); }
From source file:com.cyslab.craftvm.rest.mongo.QueryServlet.java
License:GNU General Public License
@Override protected void doGet(HttpServletRequest req, HttpServletResponse res) throws ServletException, IOException { log.trace("doGet()"); if (!can_read(req)) { res.sendError(SC_UNAUTHORIZED);//from w ww .j av a 2 s .com return; } String db_name = req.getParameter("dbname"); String col_name = req.getParameter("colname"); if (db_name == null || col_name == null) { String names[] = req2mongonames(req); if (names != null) { db_name = names[0]; col_name = names[1]; } if (db_name == null || col_name == null) { error(res, SC_BAD_REQUEST, Status.get("param name missing")); return; } } String skip = req.getParameter("skip"); String limit = req.getParameter("limit"); DB db = mongo.getDB(db_name); // mongo auth String user = req.getParameter("user"); String passwd = req.getParameter("passwd"); if (user != null && passwd != null && (!db.isAuthenticated())) { boolean auth = db.authenticate(user, passwd.toCharArray()); if (!auth) { res.sendError(SC_UNAUTHORIZED); return; } } DBCollection col = db.getCollection(col_name); DBCursor c = col.find(); if (c == null || c.count() == 0) { error(res, SC_NOT_FOUND, Status.get("no documents found")); return; } res.setIntHeader("X-Documents-Count", c.count()); if (limit != null) { try { c.limit(Math.min(Integer.parseInt(limit), MAX_FIELDS_TO_RETURN)); } catch (NumberFormatException e) { error(res, SC_BAD_REQUEST, Status.get("can not parse limit")); c.close(); return; } } else c.limit(MAX_FIELDS_TO_RETURN); if (skip != null) { try { c.skip(Integer.parseInt(skip)); } catch (NumberFormatException e) { error(res, SC_BAD_REQUEST, Status.get("can not parse skip")); c.close(); return; } } StringBuilder buf = tl.get(); buf.setLength(0); int no = 0; buf.append("["); while (c.hasNext()) { DBObject o = c.next(); if (rm_id) o.removeField("_id"); JSON.serialize(o, buf); buf.append(","); no++; } c.close(); if (no > 0) buf.setCharAt(buf.length() - 1, ']'); else buf.append(']'); res.setIntHeader("X-Documents-Returned", no); out_str(req, buf.toString(), "application/json"); }
From source file:com.ebay.cloud.cms.dal.persistence.MongoExecutor.java
License:Apache License
public static List<DBObject> find(PersistenceContext context, MetaClass metadata, DBObject queryObject, DBObject fieldObject, SearchOption option) { long start = System.currentTimeMillis(); String msg = "success"; DBCollection dbCollection = context.getDBCollection(metadata); DBCursor findCursor = null; Integer size = 0;/*from w ww. jav a2 s. c o m*/ try { findCursor = dbCollection.find(queryObject, fieldObject); // set option if (option.hasLimit()) { findCursor.limit(option.getLimit()); } if (option.hasSkip()) { findCursor.skip(option.getSkip()); } if (option.hasSort()) { findCursor.sort(option.getSort()); } // populate search result List<DBObject> result = findCursor.toArray(); size = result.size(); return result; } catch (Throwable t) { msg = t.getMessage(); handleMongoException(t); } finally { if (findCursor != null) { findCursor.close(); } logMongoAction(context, "find", start, dbCollection, queryObject, fieldObject, option, size, msg); } return Collections.emptyList(); }
From source file:com.edgytech.umongo.CollectionPanel.java
License:Apache License
private void exportToFile(final DBCollection col, final DBObject query, final DBObject fields, final DBObject sort, final int skip, final int limit, final int batchSize) { ExportDialog dia = UMongo.instance.getGlobalStore().getExportDialog(); if (!dia.show()) { return;/*w ww .java 2 s .c o m*/ } final DocumentSerializer ds = dia.getDocumentSerializer(); final boolean continueOnError = dia.getBooleanFieldValue(ExportDialog.Item.continueOnError); new DbJob() { @Override public Object doRun() throws Exception { try { try { DBCursor cur = col.find(query, fields); if (skip > 0) { cur.skip(skip); } if (batchSize != 0) { cur.batchSize(batchSize); } if (sort != null) { cur.sort(sort); } if (limit > 0) { cur.limit(limit); } while (cur.hasNext() && !stopped) { ds.writeObject(cur.next()); } } catch (Exception e) { if (continueOnError) { getLogger().log(Level.WARNING, null, e); } else { throw e; } } } finally { ds.close(); } return null; } @Override public String getNS() { return col.getFullName(); } @Override public String getShortName() { return "Export"; } }.addJob(); }
From source file:com.effektif.mongo.Query.java
License:Apache License
public void applyCursorConfigs(DBCursor dbCursor) { if (skip != null || limit != null) { log.debug("Applying page to cursor: skip=" + skip + ", limit=" + limit); if (skip != null) { dbCursor.skip(skip); }/*w w w . ja v a2 s . c o m*/ if (limit != null) { dbCursor.limit(limit); } } if (orderBy != null) { log.debug("Applying sort to cursor: " + orderBy); dbCursor.sort(orderBy); } }
From source file:com.google.api.ads.adwords.jaxws.extensions.report.model.persistence.mongodb.MongoEntityPersister.java
License:Open Source License
/** * @see com.google.api.ads.adwords.jaxws.extensions.report.model.persistence.EntityPersister * #get(java.lang.Class, java.util.Map, int, int) *///from www . j a va2 s. c o m @Override public <T, V> List<T> get(Class<T> t, Map<String, V> keyValueList, int numToSkip, int limit) { DBCollection dbcollection = getDBCollection(t, true); BasicDBObject query = new BasicDBObject(); if (keyValueList != null) { for (String key : keyValueList.keySet()) { query.put(key, keyValueList.get(key)); } } DBCursor cur = dbcollection.find(query); if (limit > 0) { cur.limit(limit); } if (numToSkip > 0) { cur.skip(numToSkip); } List<T> list = new ArrayList<T>(); while (cur.hasNext()) { DBObject dbObject = cur.next(); list.add(gson.fromJson(com.mongodb.util.JSON.serialize(dbObject), t)); } return list; }
From source file:com.icoin.trading.webui.init.SystemController.java
License:Apache License
@RequestMapping(value = "/collection/{id}", method = RequestMethod.GET) public String collection(@PathVariable("id") String collectionName, @RequestParam(value = "page", defaultValue = "1") int pageNumber, @RequestParam(value = "itemsperpage", defaultValue = "5") int itemsPerPage, Model model) { DBCursor dbCursor = springTemplate.getCollection(collectionName).find(); List<DBObject> dbObjects = dbCursor.skip((pageNumber - 1) * itemsPerPage).limit(itemsPerPage).toArray(); List<Map> items = new ArrayList<Map>(dbCursor.length()); for (DBObject dbObject : dbObjects) { items.add(dbObject.toMap());//from w w w . j av a 2 s .c om } model.addAttribute("items", items); int totalItems = dbCursor.count(); int numPages = ((int) Math.floor(totalItems / itemsPerPage)) + 1; model.addAttribute("numPages", numPages); model.addAttribute("page", pageNumber); model.addAttribute("itemsPerPage", itemsPerPage); model.addAttribute("collectionName", collectionName); return "data/collection"; }
From source file:com.ikanow.infinit.e.utility.MongoAssociationFeatureTxfer.java
License:Apache License
private void doTransfer(BasicDBObject query, int nSkip, int nLimit, BasicDBObject chunk) { ElasticSearchManager elasticManager = null; // Initialize the DB: DBCollection eventFeatureDB = DbManager.getFeature().getAssociation(); // Initialize the ES (create the index if it doesn't already): // 1. Set-up the entity feature index ElasticSearchManager.setDefaultClusterName("infinite-aws"); // (delete the index) //elasticManager = ElasticSearchManager.getIndex("association_index"); //elasticManager.deleteMe(); // Create the index if necessary String sMapping = new Gson().toJson(new AssociationFeaturePojoIndexMap.Mapping(), AssociationFeaturePojoIndexMap.Mapping.class); Builder localSettings = ImmutableSettings.settingsBuilder(); localSettings.put("number_of_shards", 1).put("number_of_replicas", 0); localSettings.put("index.analysis.analyzer.suggestAnalyzer.tokenizer", "standard"); localSettings.putArray("index.analysis.analyzer.suggestAnalyzer.filter", "standard", "lowercase"); elasticManager = ElasticSearchManager.createIndex("association_index", null, false, null, sMapping, localSettings);//from ww w .j a v a 2s.c o m // Get the index (necessary if already created) if (null == elasticManager) { elasticManager = ElasticSearchManager.getIndex("association_index"); } // Now query the DB: DBCursor dbc = null; dbc = eventFeatureDB.find(query); if (null != chunk) { if (chunk.containsField(DbManager.min_)) { dbc = dbc.addSpecial(DbManager.min_, chunk.get(DbManager.min_)); } if (chunk.containsField(DbManager.max_)) { dbc = dbc.addSpecial(DbManager.max_, chunk.get(DbManager.max_)); } } dbc = dbc.skip(nSkip).limit(nLimit).batchSize(1000); if (null == chunk) { int nCount = dbc.count() - nSkip; if (nCount < 0) nCount = 0; System.out.println( "Found " + nCount + " records to sync, process first " + (0 == nLimit ? nCount : nLimit)); if (0 == nCount) { // Nothing to do... return; } } List<AssociationFeaturePojo> events = new LinkedList<AssociationFeaturePojo>(); int nSynced = 0; // Loop over array and invoke the cleansing function for each one while (dbc.hasNext()) { BasicDBObject dbo = (BasicDBObject) dbc.next(); AssociationFeaturePojo evt = AssociationFeaturePojo.fromDb(dbo, AssociationFeaturePojo.class); // If this table has just been rebuilt from the document then the indexes are all wrong ... // recalculate and save if ('#' == evt.getIndex().charAt(0)) { AssociationPojo singleEvt = new AssociationPojo(); singleEvt.setEntity1_index(evt.getEntity1_index()); singleEvt.setEntity2_index(evt.getEntity2_index()); singleEvt.setVerb_category(evt.getVerb_category()); singleEvt.setGeo_index(evt.getGeo_index()); evt.setIndex(AssociationAggregationUtils.getEventFeatureIndex(singleEvt)); eventFeatureDB .update(new BasicDBObject("_id", dbo.get("_id")), new BasicDBObject(MongoDbManager.set_, new BasicDBObject(AssociationFeaturePojo.index_, evt.getIndex())), false, true); // (has to be a multi-update even though it's unique because it's sharded on index) } // Handle groups (system group is: "4c927585d591d31d7b37097a") if (null == evt.getCommunityId()) { evt.setCommunityId(new ObjectId("4c927585d591d31d7b37097a")); } // Bulk add prep events.add(evt); nSynced++; if (events.size() > 1000) { elasticManager.bulkAddDocuments(IndexManager.mapListToIndex(events, AssociationFeaturePojo.listType(), new AssociationFeaturePojoIndexMap()), "_id", null, true); events.clear(); } } // End loop over entities //write whatevers left elasticManager.bulkAddDocuments(IndexManager.mapListToIndex(events, AssociationFeaturePojo.listType(), new AssociationFeaturePojoIndexMap()), "_id", null, true); if (null != chunk) { System.out.println("Found " + nSynced + " records to sync in chunk"); } }
From source file:com.ikanow.infinit.e.utility.MongoDocumentTxfer.java
License:Apache License
private void doTransfer(BasicDBObject query, int nSkip, int nLimit, boolean bAggregate, BasicDBObject chunk) throws IOException { PropertiesManager pm = new PropertiesManager(); int nMaxContentSize_bytes = pm.getMaxContentSize(); // Initialize the DB: DBCollection docsDB = DbManager.getDocument().getMetadata(); DBCollection contentDB = DbManager.getDocument().getContent(); DBCollection sourcesDB = DbManager.getIngest().getSource(); ElasticSearchManager.setDefaultClusterName("infinite-aws"); // 1. Get the documents from the DB (combining data + metadata and refreshing source meta) // (Ignore soft-deleted records:) if (null == query) { query = new BasicDBObject(); }//w w w . j a va2s . c o m Object sourceKeyQueryTerm = query.remove(DocumentPojo.sourceKey_); if (null != sourceKeyQueryTerm) { if (query.toString() .contains(new StringBuffer('"').append(DocumentPojo.sourceKey_).append('"').toString())) { throw new RuntimeException( "Can't specify sourceKey as part of complex query term: " + query.toString()); } //TESTED (by hand, "{ \"sourceKey\": \"x\", \"$or\": [ { \"sourceKey\": \"x\" } ] }") if (sourceKeyQueryTerm instanceof String) { query.put(DocumentPojo.sourceKey_, SourcePojo.getDistributedKeyQueryTerm((String) sourceKeyQueryTerm)); } //TESTED (by hand, "{\"sourceKey\": \"feeds.arstechnica.com.arstechnica.index.11.2.\" }") else if (sourceKeyQueryTerm instanceof DBObject) { // find all the _sources_ matching this term, and convert to a big list including distribution BasicDBObject fields = new BasicDBObject(SourcePojo.key_, 1); fields.put(SourcePojo.highestDistributionFactorStored_, 1); DBCursor dbc = sourcesDB.find(new BasicDBObject(SourcePojo.key_, sourceKeyQueryTerm), fields); LinkedList<String> sourceKeys = new LinkedList<String>(); for (DBObject dbo : dbc) { String key = (String) dbo.get(SourcePojo.key_); Integer distributionFactor = (Integer) dbo.get(SourcePojo.highestDistributionFactorStored_); Collection<String> sourceKeysForSource = SourcePojo.getDistributedKeys(key, distributionFactor); sourceKeys.addAll(sourceKeysForSource); } query.put(DocumentPojo.sourceKey_, new BasicDBObject(DbManager.in_, sourceKeys)); } //TESTED (by hand, "{\"sourceKey\": { \"$gt\": \"dev.ikanow\" } }") else { throw new RuntimeException("Can't specify sourceKey as part of complex query term"); } //(actually not possible, just included here for mathematical completeness...) } else { if (query.toString() .contains(new StringBuffer('"').append(DocumentPojo.sourceKey_).append('"').toString())) { throw new RuntimeException("Can't specify sourceKey as part of complex query term"); } //TESTE (by hand, "{ \"$or\": [ { \"sourceKey\": \"x\" } ] }") // Optimize communityId into sourceKeys... if (null != query.get(DocumentPojo.communityId_)) { try { ObjectId commId = query.getObjectId(DocumentPojo.communityId_); BasicDBObject fields = new BasicDBObject(SourcePojo.key_, 1); fields.put(SourcePojo.highestDistributionFactorStored_, 1); DBCursor dbc = sourcesDB.find(new BasicDBObject(SourcePojo.communityIds_, commId), fields); LinkedList<String> sourceKeys = new LinkedList<String>(); int added = 0; for (DBObject dbo : dbc) { String key = (String) dbo.get(SourcePojo.key_); Integer distributionFactor = (Integer) dbo.get(SourcePojo.highestDistributionFactorStored_); Collection<String> sourceKeysForSource = SourcePojo.getDistributedKeys(key, distributionFactor); sourceKeys.addAll(sourceKeysForSource); added += sourceKeysForSource.size(); } query.put(DocumentPojo.sourceKey_, new BasicDBObject(DbManager.in_, sourceKeys)); System.out.println("(Optimized simple community query to " + added + " source key(s))"); } catch (Exception e) { //DEBUG //e.printStackTrace(); System.out.println("(Can't optimize complex community query: " + e.getMessage()); } } //TESTED (by hand - including distributed source version) } // Ignored delete objects Object urlQuery = query.get(DocumentPojo.url_); if (null == urlQuery) { query.put(DocumentPojo.url_, Pattern.compile("^[^?]")); // (ie nothing starting with ?) } //TESTED else if (urlQuery instanceof BasicDBObject) { ((BasicDBObject) urlQuery).append("$regex", "^[^?]"); } //TESTED //DEBUG //System.out.println("COMBINED QUERY= " + query.toString()); // If aggregating, kick off the background aggregation thread if (bAggregate) { EntityBackgroundAggregationManager.startThread(); AssociationBackgroundAggregationManager.startThread(); } //Debug: DBCursor dbc = null; dbc = docsDB.find(query); if (null != chunk) { if (chunk.containsField(DbManager.min_)) { dbc = dbc.addSpecial(DbManager.min_, chunk.get(DbManager.min_)); } if (chunk.containsField(DbManager.max_)) { dbc = dbc.addSpecial(DbManager.max_, chunk.get(DbManager.max_)); } } dbc = dbc.skip(nSkip).limit(nLimit).batchSize(1000); if (null == chunk) { int nCount = dbc.count() - nSkip; if (nCount < 0) nCount = 0; System.out.println( "Found " + nCount + " records to sync, process first " + (0 == nLimit ? nCount : nLimit)); if (0 == nCount) { // Nothing to do... return; } } byte[] storageArray = new byte[200000]; int nSynced = 0; LinkedList<DocumentPojo> docsToTransfer = new LinkedList<DocumentPojo>(); Map<ObjectId, LinkedList<DocumentPojo>> communityList = null; ObjectId currCommunityId = null; while (dbc.hasNext()) { BasicDBObject dbo = (BasicDBObject) dbc.next(); DocumentPojo doc = DocumentPojo.fromDb(dbo, DocumentPojo.class); String sDocIndex = doc.getIndex(); if (null == sDocIndex) { sDocIndex = "document_index"; } if ((null != _deletedIndex) && !_deletedIndex.contains(sDocIndex)) { _deletedIndex.add(sDocIndex); rebuildIndex(sDocIndex); try { // (Just in case the index requires some time to sort itself out) Thread.sleep(1000); } catch (InterruptedException e) { } } //Debug: //System.out.println("Getting content..." + feed.getTitle() + " / " + feed.getUrl()); // Get the content: if ((0 != nMaxContentSize_bytes) && StoreAndIndexManager.docHasExternalContent(doc.getUrl(), doc.getSourceUrl())) { BasicDBObject contentQ = new BasicDBObject(CompressedFullTextPojo.url_, doc.getUrl()); contentQ.put(CompressedFullTextPojo.sourceKey_, new BasicDBObject(MongoDbManager.in_, Arrays.asList(null, doc.getSourceKey()))); BasicDBObject fields = new BasicDBObject(CompressedFullTextPojo.gzip_content_, 1); fields.put(CompressedFullTextPojo.sourceKey_, 1); DBCursor dbcGzip = contentDB.find(contentQ, fields); while (dbcGzip.hasNext()) { BasicDBObject dboContent = (BasicDBObject) dbcGzip.next(); if (!dboContent.containsField(CompressedFullTextPojo.sourceKey_)) { // If this has another version then ignore this one... if (dbc.hasNext()) { continue; } //TESTED (by hand) } byte[] compressedData = ((byte[]) dboContent.get(CompressedFullTextPojo.gzip_content_)); ByteArrayInputStream in = new ByteArrayInputStream(compressedData); GZIPInputStream gzip = new GZIPInputStream(in); int nRead = 0; StringBuffer output = new StringBuffer(); while (nRead >= 0) { nRead = gzip.read(storageArray, 0, 200000); if (nRead > 0) { String s = new String(storageArray, 0, nRead, "UTF-8"); output.append(s); } } doc.setFullText(output.toString()); } } // (else document has full text already) // Get tags, if necessary: // Always overwrite tags - one of the reasons we might choose to migrate // Also may need source in order to support source index filtering SourcePojo src = _sourceCache.get(doc.getSourceKey()); if (null == src) { //TODO (INF-2265): handle search index settings in pipeline mode... (also didn't seem to work?) BasicDBObject srcDbo = (BasicDBObject) sourcesDB .findOne(new BasicDBObject(SourcePojo.key_, doc.getSourceKey())); if (null != srcDbo) { src = SourcePojo.fromDb(srcDbo, SourcePojo.class); if (null != src.getProcessingPipeline()) { try { // Set the index settings HarvestController hc = new HarvestController(); HarvestControllerPipeline hcPipe = new HarvestControllerPipeline(); hcPipe.extractSource_preProcessingPipeline(src, hc); } catch (Exception e) { //DEBUG e.printStackTrace(); } } //TESTED (by hand) _sourceCache.put(doc.getSourceKey(), src); } } doc.setTempSource(src); // (needed for source index filtering) if (null != src) { if (null != src.getTags()) { Set<String> tagsTidied = new TreeSet<String>(); for (String s : src.getTags()) { String ss = s.trim().toLowerCase(); tagsTidied.add(ss); } // May also want to write this back to the DB: //TODO (INF-2223): Handle append tags or not in the pipeline... if ((null == src.getAppendTagsToDocs()) || src.getAppendTagsToDocs()) { if ((null == doc.getTags()) || (doc.getTags().size() < tagsTidied.size())) { BasicDBObject updateQuery = new BasicDBObject(DocumentPojo.sourceKey_, doc.getRawSourceKey()); // (ie including the # if there is one) updateQuery.put(DocumentPojo._id_, doc.getId()); docsDB.update(updateQuery, new BasicDBObject(DbManager.addToSet_, new BasicDBObject(DocumentPojo.tags_, new BasicDBObject(DbManager.each_, tagsTidied)))); } doc.setTags(tagsTidied); // (just copy ptr across) } } } // 2. Update the index with the new document // (Optionally also update entity and assoc features) if (bAggregate) { if (null == currCommunityId) { currCommunityId = doc.getCommunityId(); } else if (!currCommunityId.equals(doc.getCommunityId())) { LinkedList<DocumentPojo> perCommunityDocList = null; if (null == communityList) { // (very first time we see > 1 community) communityList = new TreeMap<ObjectId, LinkedList<DocumentPojo>>(); perCommunityDocList = new LinkedList<DocumentPojo>(); perCommunityDocList.addAll(docsToTransfer); //(NOT including doc, this hasn't been added to docsToTransfer yet) communityList.put(currCommunityId, perCommunityDocList); } currCommunityId = doc.getCommunityId(); perCommunityDocList = communityList.get(currCommunityId); if (null == perCommunityDocList) { perCommunityDocList = new LinkedList<DocumentPojo>(); communityList.put(currCommunityId, perCommunityDocList); } perCommunityDocList.add(doc); } } //TESTED nSynced++; docsToTransfer.add(doc); if (0 == (nSynced % 10000)) { StoreAndIndexManager manager = new StoreAndIndexManager(); if (bAggregate) { // Loop over communities and aggregate each one then store the modified entities/assocs doAggregation(communityList, docsToTransfer); communityList = null; // (in case the next 10,000 docs are all in the same community!) currCommunityId = null; } //TOTEST manager.addToSearch(docsToTransfer); docsToTransfer.clear(); System.out.println("(Synced " + nSynced + " records)"); } } // (End loop over docs) // Sync remaining docs if (!docsToTransfer.isEmpty()) { if (bAggregate) { // Loop over communities and aggregate each one then store the modified entities/assocs doAggregation(communityList, docsToTransfer); } StoreAndIndexManager manager = new StoreAndIndexManager(); manager.addToSearch(docsToTransfer); } if (null != chunk) { System.out.println("Found " + nSynced + " records to sync in chunk"); } if (bAggregate) { System.out.println("Completed. You can hit CTRL+C at any time."); System.out.println( "By default it will keep running for 5 minutes while the background aggregation runs to update the documents' entities."); try { Thread.sleep(300000); } catch (InterruptedException e) { } // Turn off so we can exit EntityBackgroundAggregationManager.stopThreadAndWait(); AssociationBackgroundAggregationManager.stopThreadAndWait(); } }
From source file:com.ikanow.infinit.e.utility.MongoEntityFeatureTxfer.java
License:Apache License
private void doTransfer(BasicDBObject query, int nSkip, int nLimit, BasicDBObject chunk) { ElasticSearchManager elasticManager = null; // Initialize the DB: DBCollection entityFeatureDB = DbManager.getFeature().getEntity(); // Initialize the ES (create the index if it doesn't already): // 1. Set-up the entity feature index String indexName = "entity_index"; ElasticSearchManager.setDefaultClusterName("infinite-aws"); // (delete the index) //elasticManager = ElasticSearchManager.getIndex(indexName); //elasticManager.deleteMe(); // Create the index if necessary String sMapping = new Gson().toJson(new EntityFeaturePojoIndexMap.Mapping(), EntityFeaturePojoIndexMap.Mapping.class); Builder localSettings = ImmutableSettings.settingsBuilder(); localSettings.put("number_of_shards", 1).put("number_of_replicas", 0); localSettings.put("index.analysis.analyzer.suggestAnalyzer.tokenizer", "standard"); localSettings.putArray("index.analysis.analyzer.suggestAnalyzer.filter", "standard", "lowercase"); elasticManager = ElasticSearchManager.createIndex(indexName, null, false, null, sMapping, localSettings); // Get the index (necessary if already created) if (null == elasticManager) { elasticManager = ElasticSearchManager.getIndex(indexName); }/* w w w . j a v a2 s . c o m*/ // Now query the DB: DBCursor dbc = null; dbc = entityFeatureDB.find(query); if (null != chunk) { if (chunk.containsField(DbManager.min_)) { dbc = dbc.addSpecial(DbManager.min_, chunk.get(DbManager.min_)); } if (chunk.containsField(DbManager.max_)) { dbc = dbc.addSpecial(DbManager.max_, chunk.get(DbManager.max_)); } } dbc = dbc.skip(nSkip).limit(nLimit).batchSize(1000); if (null == chunk) { int nCount = dbc.count() - nSkip; if (nCount < 0) nCount = 0; System.out.println( "Found " + nCount + " records to sync, process first " + (0 == nLimit ? nCount : nLimit)); if (0 == nCount) { // Nothing to do... return; } } int nSynced = 0; List<EntityFeaturePojo> entities = new ArrayList<EntityFeaturePojo>(); while (dbc.hasNext()) { EntityFeaturePojo feature = EntityFeaturePojo.fromDb(dbc.next(), EntityFeaturePojo.class); if (null != feature.getAlias()) { // (some corrupt gazateer entry) // Handle groups (system group is: "4c927585d591d31d7b37097a") // if there is no community id, add system group (something is wrong if this happens?) if (null == feature.getCommunityId()) { feature.setCommunityId(new ObjectId("4c927585d591d31d7b37097a")); } } entities.add(feature); nSynced++; // Add the entities if (entities.size() > 1000) { elasticManager.bulkAddDocuments(IndexManager.mapListToIndex(entities, EntityFeaturePojo.listType(), new EntityFeaturePojoIndexMap()), "_id", null, true); // (note EntityFeaturePojoIndexMap creates an "_id" field of the format index:community) entities = new ArrayList<EntityFeaturePojo>(); } } //write whatevers left elasticManager.bulkAddDocuments(IndexManager.mapListToIndex(entities, EntityFeaturePojo.listType(), new EntityFeaturePojoIndexMap()), "_id", null, true); // (note EntityFeaturePojoIndexMap creates an "_id" field of the format index:community) if (null != chunk) { System.out.println("Found " + nSynced + " records to sync in chunk"); } }