List of usage examples for com.mongodb DBCursor iterator
@Override
public Iterator<DBObject> iterator()
Creates a copy of this cursor object that can be iterated.
From source file:bhl.pages.database.MongoConnection.java
License:Open Source License
/** * List all the documents in a Mongo collection * @param collName the name of the collection * @param key the document key to retrieve by * @return a String array of document keys * @throws DbException /*from w ww.j ava2s .c o m*/ */ @Override public String[] listCollectionByKey(String collName, String key) throws DbException { try { connect(); } catch (Exception e) { throw new DbException(e); } DBCollection coll = getCollectionFromName(collName); BasicDBObject keys = new BasicDBObject(); keys.put(key, 1); DBCursor cursor = coll.find(new BasicDBObject(), keys); if (cursor.length() > 0) { String[] docs = new String[cursor.length()]; Iterator<DBObject> iter = cursor.iterator(); int i = 0; while (iter.hasNext()) { Object obj = iter.next().get(key); docs[i++] = obj.toString(); } return docs; } else throw new DbException("no docs in collection " + collName); }
From source file:bhl.pages.database.MongoConnection.java
License:Open Source License
/** * Make a subset of the documents by a given subkey and value, * then retrieve all the values of the field * @param collection the collection to search * @param subKey the subKey to make the initial choice * @param subValue the value of the subKey to search for * @param fields the field names to retrieve * @return and array of field values as JSON object strings * @throws DbException /* ww w. j a va 2 s . com*/ */ @Override public String[] listCollectionBySubKey(String collection, String subKey, String subValue, String[] fields) throws DbException { try { connect(); DBCollection coll = getCollectionFromName(collection); DBObject query = new BasicDBObject(subKey, subValue); BasicDBObject keys = new BasicDBObject(); for (int i = 0; i < fields.length; i++) keys.put(fields[i], 1); DBCursor cursor = coll.find(query, keys); if (cursor.length() > 0) { String[] array = new String[cursor.length()]; Iterator iter = cursor.iterator(); int i = 0; while (iter.hasNext()) { DBObject bson = (DBObject) iter.next(); JSONObject jobj = new JSONObject(); for (int j = 0; j < fields.length; j++) jobj.put(fields[j], bson.get(fields[j])); array[i++] = jobj.toJSONString(); } return array; } else return new String[0]; } catch (Exception e) { throw new DbException(e); } }
From source file:calliope.core.database.MongoConnection.java
License:Open Source License
/** * Get a list of docIDs or file names corresponding to the regex expr * @param collName the collection to query * @param expr the regular expression to match against docid * @param key the key to retrieve for each matching document * @return an array of matching docids, which may be empty *///from w w w.j a v a 2s.co m @Override public String[] listDocuments(String collName, String expr, String key) throws DbException { try { try { connect(); } catch (Exception e) { throw new DbException(e); } DBCollection coll = getCollectionFromName(collName); if (coll != null) { BasicDBObject q = new BasicDBObject(); q.put(JSONKeys.DOCID, Pattern.compile(expr)); DBCursor curs = coll.find(q); ArrayList<String> docids = new ArrayList<String>(); Iterator<DBObject> iter = curs.iterator(); int i = 0; while (iter.hasNext()) { Object kId = iter.next().get(key); if (kId != null) docids.add(kId.toString()); } String[] array = new String[docids.size()]; docids.toArray(array); return array; } else throw new DbException("collection " + collName + " not found"); } catch (Exception e) { throw new DbException(e); } }
From source file:calliope.core.database.MongoConnection.java
License:Open Source License
/** * List all the documents in a Mongo collection * @param collName the name of the collection * @param key the document key to retrieve by * @return a String array of document keys * @throws DbException /*from w w w .j a v a 2s . c om*/ */ @Override public String[] listCollectionByKey(String collName, String key) throws DbException { try { connect(); } catch (Exception e) { throw new DbException(e); } DBCollection coll = getCollectionFromName(collName); BasicDBObject keys = new BasicDBObject(); keys.put(key, 1); DBCursor cursor = coll.find(new BasicDBObject(), keys); if (cursor.length() > 0) { String[] docs = new String[cursor.length()]; Iterator<DBObject> iter = cursor.iterator(); int i = 0; while (iter.hasNext()) { DBObject dbObj = iter.next(); Object obj = dbObj.get(key); if (key.equals(JSONKeys._ID)) { ObjectId id = (ObjectId) dbObj.get(JSONKeys._ID); obj = id.toStringMongod(); docs[i++] = (String) obj; } else docs[i++] = obj.toString(); } return docs; } else return new String[0]; }
From source file:calliope.core.database.MongoConnection.java
License:Open Source License
/** * List all the documents in a Mongo collection * @param collName the name of the collection * @return a String array of document keys * @throws DbException /*from ww w . j a v a2 s. c o m*/ */ @Override public String[] listCollection(String collName) throws DbException { try { connect(); } catch (Exception e) { throw new DbException(e); } DBCollection coll = getCollectionFromName(collName); BasicDBObject keys = new BasicDBObject(); keys.put(JSONKeys.DOCID, 1); DBCursor cursor = coll.find(new BasicDBObject(), keys); if (cursor.length() > 0) { String[] docs = new String[cursor.length()]; Iterator<DBObject> iter = cursor.iterator(); int i = 0; while (iter.hasNext()) docs[i++] = (String) iter.next().get(JSONKeys.DOCID); return docs; } else return new String[0]; }
From source file:calliope.db.MongoConnection.java
License:Open Source License
/** * Get a list of docIDs or file names corresponding to the regex expr * @param collName the collection to query * @param expr the regular expression to match against docid * @return an array of matching docids, which may be empty */// w w w. j a va2s . co m @Override public String[] listDocuments(String collName, String expr) throws AeseException { try { connect(); DBCollection coll = getCollectionFromName(collName); if (coll != null) { BasicDBObject q = new BasicDBObject(); q.put(JSONKeys.DOCID, Pattern.compile(expr)); DBCursor curs = coll.find(q); ArrayList<String> docids = new ArrayList<String>(); Iterator<DBObject> iter = curs.iterator(); int i = 0; while (iter.hasNext()) { String dId = (String) iter.next().get(JSONKeys.DOCID); if (dId.matches(expr)) docids.add(dId); } String[] array = new String[docids.size()]; docids.toArray(array); return array; } else throw new AeseException("collection " + collName + " not found"); } catch (Exception e) { throw new AeseException(e); } }
From source file:calliope.db.MongoConnection.java
License:Open Source License
/** * List all the documents in a Mongo collection * @param collName the name of the collection * @return a String array of document keys * @throws AeseException //from w ww . jav a2 s . c o m */ @Override public String[] listCollection(String collName) throws AeseException { if (!collName.equals(Database.CORPIX)) { try { connect(); } catch (Exception e) { throw new AeseException(e); } DBCollection coll = getCollectionFromName(collName); BasicDBObject keys = new BasicDBObject(); keys.put(JSONKeys.DOCID, 1); DBCursor cursor = coll.find(new BasicDBObject(), keys); System.out.println("Found " + cursor.count() + " documents"); cursor.count(); if (cursor.length() > 0) { String[] docs = new String[cursor.length()]; Iterator<DBObject> iter = cursor.iterator(); int i = 0; while (iter.hasNext()) docs[i++] = (String) iter.next().get(JSONKeys.DOCID); return docs; } else { return new String[0]; } } else { GridFS gfs = new GridFS(db, collName); DBCursor curs = gfs.getFileList(); int i = 0; List<DBObject> list = curs.toArray(); HashSet<String> set = new HashSet<String>(); Iterator<DBObject> iter = list.iterator(); while (iter.hasNext()) { String name = (String) iter.next().get("filename"); set.add(name); } String[] docs = new String[set.size()]; set.toArray(docs); return docs; } }
From source file:com.bugull.mongo.AdvancedDao.java
License:Apache License
private synchronized Iterable<DBObject> mapReduce(String map, String reduce, String outputTarget, MapReduceCommand.OutputType outputType, String orderBy, DBObject query) throws MapReduceException { MapReduceOutput output = coll.mapReduce(map, reduce, outputTarget, outputType, query); CommandResult cr = output.getCommandResult(); if (!cr.ok()) { throw new MapReduceException(cr.getErrorMessage()); }/*from w ww. j a v a 2s. c o m*/ DBCollection c = output.getOutputCollection(); DBCursor cursor = null; if (orderBy != null) { cursor = c.find().sort(MapperUtil.getSort(orderBy)); } else { cursor = c.find(); } List<DBObject> list = new ArrayList<DBObject>(); for (Iterator<DBObject> it = cursor.iterator(); it.hasNext();) { list.add(it.next()); } return list; }
From source file:com.bugull.mongo.AdvancedDao.java
License:Apache License
private synchronized Iterable<DBObject> mapReduce(String map, String reduce, String outputTarget, MapReduceCommand.OutputType outputType, String orderBy, int pageNum, int pageSize, DBObject query) throws MapReduceException { MapReduceOutput output = coll.mapReduce(map, reduce, outputTarget, outputType, query); CommandResult cr = output.getCommandResult(); if (!cr.ok()) { throw new MapReduceException(cr.getErrorMessage()); }/* w w w .j av a 2s. co m*/ DBCollection c = output.getOutputCollection(); DBCursor cursor = null; if (orderBy != null) { cursor = c.find().sort(MapperUtil.getSort(orderBy)).skip((pageNum - 1) * pageSize).limit(pageSize); } else { cursor = c.find().skip((pageNum - 1) * pageSize).limit(pageSize); } List<DBObject> list = new ArrayList<DBObject>(); for (Iterator<DBObject> it = cursor.iterator(); it.hasNext();) { list.add(it.next()); } return list; }
From source file:com.ikanow.infinit.e.harvest.extraction.document.DuplicateManager_Integrated.java
License:Open Source License
/** * Tests to see if duplicates might exist. * If it is not a duplicate, true is returned. If it is a duplicate, * the modified date is then checked to see if the file has been updated. * True is returned if the file has been updated, false otherwise. * //ww w. jav a2s .c o m * @param collection * @param modifiedDate * @param url * @param title * @return boolean (true/false) */ public boolean needsUpdated_SourceUrl(Date modifiedDate, String sourceUrl, SourcePojo source) { // Performance shortcut: if (!_bCalculatedMostRecentlyModifiedFile) { _bCalculatedMostRecentlyModifiedFile = true; // Get date of most recently modified file: try { if ((null != source.getHarvestStatus()) && (HarvestEnum.success == source.getHarvestStatus().getHarvest_status())) { BasicDBObject mostRecentQuery = new BasicDBObject(DocumentPojo.sourceKey_, source.getDistributedKeyQueryTerm()); BasicDBObject mostRecentSort = new BasicDBObject(DocumentPojo._id_, -1); BasicDBObject mostRecentFields = new BasicDBObject(DocumentPojo.modified_, 1); if (null != source.getDistributionFactor()) { // (need the created date also mostRecentFields.put(DocumentPojo.created_, 1); } DBCursor mostRecentDocs = MongoDbManager.getDocument().getMetadata() .find(mostRecentQuery, mostRecentFields).sort(mostRecentSort).limit(1); if (mostRecentDocs.hasNext()) { BasicDBObject mostRecentDocDbo = (BasicDBObject) mostRecentDocs.next(); _mostRecentlyModifiedFile = (Date) mostRecentDocDbo.get(DocumentPojo.modified_); _mostRecentlyModifiedDocId = (ObjectId) mostRecentDocDbo.get(DocumentPojo._id_); if (null != source.getDistributionFactor()) { // This is a slightly more complex case because other... //...threads for this source could be writing documents asynchronously ... so we're just going to disable everything //if the most recent doc is _after_ our last harvest time (since this means we've already started harvesting the new source) Date mostRecentlyModifedFile_createdTime = (Date) mostRecentDocDbo .get(DocumentPojo.created_); if ((null != source.getHarvestStatus()) && (null != source.getHarvestStatus().getHarvested() && (null != mostRecentlyModifedFile_createdTime))) { if (mostRecentlyModifedFile_createdTime .after(source.getHarvestStatus().getHarvested())) { _mostRecentlyModifiedFile = null; _mostRecentlyModifiedDocId = null; } } else { // If we don't have a date then force a "slow" dedup _mostRecentlyModifiedFile = null; _mostRecentlyModifiedDocId = null; } } //TESTED } //(found docs) } //(success mode) } catch (Exception e) { } // If anything goes wrong will just check all files (slower) } //TESTED if (null != _mostRecentlyModifiedFile) { // Use short cut... long nMostRecentlyModifiedTime = _mostRecentlyModifiedFile.getTime() / 1000L; long nFileTime = modifiedDate.getTime() / 1000L; if (nFileTime <= nMostRecentlyModifiedTime) { return false; } } //TESTED else if (null == sourceUrl) { return true; // (for custom checking - if we couldn't get a cached value to compare against then assume we are inspecting) } // No short cut, go the long way round: DBCollection collection = DbManager.getDocument().getMetadata(); boolean ret = true; BasicDBObject query = new BasicDBObject(); query.put(DocumentPojo.sourceUrl_, sourceUrl); query.put(DocumentPojo.sourceKey_, source.getDistributedKeyQueryTerm()); BasicDBObject hint = new BasicDBObject(DocumentPojo.sourceUrl_, 2); BasicDBObject fields = new BasicDBObject(DocumentPojo.modified_, 1); DBCursor dbc = collection.find(query, fields).hint(hint).limit(1); // (this should be very fast since sourceUrl is indexed ... order doesn't matter as all docs should have the same modified) //TODO (INF-1922): at some point should look into making (sparse) sourceUrl be compounded with sourceKey - this is a bit risky if (!dbc.hasNext()) { //if there is no record, return true ret = true; modifiedDate.setTime(0); } else { // (all docs should have same modified, though this is ~ time ordered anyway) BasicDBObject dbo = (BasicDBObject) dbc.iterator().next(); Date oldModified = (Date) dbo.get(DocumentPojo.modified_); ret = ((modifiedDate.getTime() / 1000) != (oldModified.getTime() / 1000)); // ie if different -> true -> update docs from sourceUrl // ^^ note granularity seems only to be guaranteed to 1s somewhere in the system (not sure where) // (this is just backwards compatible for a deployment where this has happened for some % -probably 100- of the docs // once an RPM >=5955 is deployed this will no longer be necessary) } return ret; }