Example usage for com.mongodb DBCursor skip

List of usage examples for com.mongodb DBCursor skip

Introduction

In this page you can find the example usage for com.mongodb DBCursor skip.

Prototype

public DBCursor skip(final int numberOfElements) 

Source Link

Document

Discards a given number of elements at the beginning of the cursor.

Usage

From source file:com.cyslab.craftvm.rest.mongo.QueryServlet.java

License:GNU General Public License

@Override
protected void doPost(HttpServletRequest req, HttpServletResponse res) throws ServletException, IOException {

    log.trace("doPost()");

    // server auth
    if (!can_read(req)) {
        res.sendError(SC_UNAUTHORIZED);//from   ww  w. j  ava  2s.  com
        return;
    }

    String ret = null;

    InputStream is = req.getInputStream();
    String db_name = req.getParameter("dbname");
    String col_name = req.getParameter("colname");
    if (db_name == null || col_name == null) {
        String names[] = req2mongonames(req);
        if (names != null) {
            db_name = names[0];
            col_name = names[1];
        }
        if (db_name == null || col_name == null) {
            error(res, SC_BAD_REQUEST, Status.get("param name missing"));
            return;
        }
    }
    String skip = req.getParameter("skip");
    String limit = req.getParameter("limit");

    String _fields = req.getParameter("fields");
    String fields[] = null;
    if (_fields != null)
        fields = _fields.split("[,]");

    DB db = mongo.getDB(db_name);

    // mongo auth
    String user = req.getParameter("user");
    String passwd = req.getParameter("passwd");
    if (user != null && passwd != null && (!db.isAuthenticated())) {
        boolean auth = db.authenticate(user, passwd.toCharArray());
        if (!auth) {
            res.sendError(SC_UNAUTHORIZED);
            return;
        }
    }

    DBCollection col = db.getCollection(col_name);

    StringBuilder buf = tl.get();
    // reset buf
    buf.setLength(0);

    BufferedReader r = null;
    DBObject q = null, sort = null;

    try {

        r = new BufferedReader(new InputStreamReader(is));
        String data = r.readLine();
        if (data == null) {
            error(res, SC_BAD_REQUEST, Status.get("no data"));
            return;
        }
        try {
            q = (DBObject) JSON.parse(data);
            if (cache != null) {
                buf.append(db_name);
                buf.append(col_name);
                buf.append(data);
            }
        } catch (JSONParseException e) {
            error(res, SC_BAD_REQUEST, Status.get("can not parse data"));
            return;
        }
        // sort param
        data = r.readLine();
        if (data != null) {
            try {
                sort = (DBObject) JSON.parse(data);
            } catch (JSONParseException e) {
                error(res, SC_BAD_REQUEST, Status.get("can not parse sort arg"));
                return;
            }
            if (cache != null)
                buf.append(data);
        }

    } finally {
        if (r != null)
            r.close();
    }

    // limit
    int lim;
    if (limit != null) {
        try {
            lim = Math.min(Integer.parseInt(limit), MAX_FIELDS_TO_RETURN);
        } catch (NumberFormatException e) {
            error(res, SC_BAD_REQUEST, Status.get("can not parse limit"));
            return;
        }
    } else {
        lim = MAX_FIELDS_TO_RETURN;
    }
    if (cache != null) {
        buf.append(";limit=");
        buf.append(lim);
    }

    // skip
    int sk = -1;
    if (skip != null) {
        try {
            sk = Integer.parseInt(skip);
            if (cache != null) {
                buf.append(";skip=");
                buf.append(sk);
            }
        } catch (NumberFormatException e) {
            error(res, SC_BAD_REQUEST, Status.get("can not parse skip"));
            return;
        }
    }

    // fields
    if (cache != null && _fields != null) {
        buf.append(";fields=");
        buf.append(_fields);
    }

    // from cache
    String cache_key = null;
    if (cache != null) {
        cache_key = buf.toString();
        try {
            ret = (String) cache.get(cache_key);
        }
        // some wrong char in key
        catch (IllegalArgumentException e) {
            int l = buf.length();
            for (int i = 0; i < l; i++) {
                if (buf.charAt(i) == ' ')
                    buf.setCharAt(i, '*');
            }
            cache_key = buf.toString();
            ret = (String) cache.get(cache_key);
        }
        if (ret != null) {
            out_str(req, ret, "application/json");
            return;
        }
    }

    // cursor
    DBCursor c;
    if (fields != null) {
        StringBuilder sb = new StringBuilder();
        sb.append("{");
        int len = fields.length;
        for (int i = 0; i < len; i++) {
            String s = fields[i];
            sb.append('"');
            sb.append(s);
            sb.append('"');
            sb.append(":1");
            if (i != (len - 1))
                sb.append(",");
        }
        sb.append("}");
        c = col.find(q, (DBObject) JSON.parse(sb.toString()));
    } else
        c = col.find(q);

    if (c == null || c.count() == 0) {
        error(res, SC_NOT_FOUND, Status.get("no documents found"));
        return;
    }

    if (sort != null)
        c.sort(sort);

    res.setIntHeader("X-Documents-Count", c.count());

    c.limit(lim);
    if (sk != -1)
        c.skip(sk);

    // reset buf
    buf.setLength(0);

    int no = 0;
    buf.append("[");
    while (c.hasNext()) {

        DBObject o = c.next();
        if (rm_id)
            o.removeField("_id");
        JSON.serialize(o, buf);
        buf.append(",");
        no++;

    }
    c.close();

    if (no > 0)
        buf.setCharAt(buf.length() - 1, ']');
    else
        buf.append(']');

    res.setIntHeader("X-Documents-Returned", no);

    ret = buf.toString();
    if (cache != null)
        cache.set(cache_key, ret);

    out_str(req, ret, "application/json");

}

From source file:com.cyslab.craftvm.rest.mongo.QueryServlet.java

License:GNU General Public License

@Override
protected void doGet(HttpServletRequest req, HttpServletResponse res) throws ServletException, IOException {

    log.trace("doGet()");

    if (!can_read(req)) {
        res.sendError(SC_UNAUTHORIZED);//from  w  ww  .j  av a 2  s .com
        return;
    }

    String db_name = req.getParameter("dbname");
    String col_name = req.getParameter("colname");
    if (db_name == null || col_name == null) {
        String names[] = req2mongonames(req);
        if (names != null) {
            db_name = names[0];
            col_name = names[1];
        }
        if (db_name == null || col_name == null) {
            error(res, SC_BAD_REQUEST, Status.get("param name missing"));
            return;
        }
    }
    String skip = req.getParameter("skip");
    String limit = req.getParameter("limit");

    DB db = mongo.getDB(db_name);

    // mongo auth
    String user = req.getParameter("user");
    String passwd = req.getParameter("passwd");
    if (user != null && passwd != null && (!db.isAuthenticated())) {
        boolean auth = db.authenticate(user, passwd.toCharArray());
        if (!auth) {
            res.sendError(SC_UNAUTHORIZED);
            return;
        }
    }

    DBCollection col = db.getCollection(col_name);

    DBCursor c = col.find();
    if (c == null || c.count() == 0) {
        error(res, SC_NOT_FOUND, Status.get("no documents found"));
        return;
    }

    res.setIntHeader("X-Documents-Count", c.count());

    if (limit != null) {
        try {
            c.limit(Math.min(Integer.parseInt(limit), MAX_FIELDS_TO_RETURN));
        } catch (NumberFormatException e) {
            error(res, SC_BAD_REQUEST, Status.get("can not parse limit"));
            c.close();
            return;
        }
    } else
        c.limit(MAX_FIELDS_TO_RETURN);

    if (skip != null) {
        try {
            c.skip(Integer.parseInt(skip));
        } catch (NumberFormatException e) {
            error(res, SC_BAD_REQUEST, Status.get("can not parse skip"));
            c.close();
            return;
        }
    }

    StringBuilder buf = tl.get();
    buf.setLength(0);

    int no = 0;
    buf.append("[");
    while (c.hasNext()) {

        DBObject o = c.next();
        if (rm_id)
            o.removeField("_id");
        JSON.serialize(o, buf);
        buf.append(",");
        no++;

    }
    c.close();

    if (no > 0)
        buf.setCharAt(buf.length() - 1, ']');
    else
        buf.append(']');

    res.setIntHeader("X-Documents-Returned", no);

    out_str(req, buf.toString(), "application/json");

}

From source file:com.ebay.cloud.cms.dal.persistence.MongoExecutor.java

License:Apache License

public static List<DBObject> find(PersistenceContext context, MetaClass metadata, DBObject queryObject,
        DBObject fieldObject, SearchOption option) {
    long start = System.currentTimeMillis();
    String msg = "success";
    DBCollection dbCollection = context.getDBCollection(metadata);
    DBCursor findCursor = null;
    Integer size = 0;/*from w  ww.  jav  a2  s. c  o m*/
    try {
        findCursor = dbCollection.find(queryObject, fieldObject);
        // set option
        if (option.hasLimit()) {
            findCursor.limit(option.getLimit());
        }
        if (option.hasSkip()) {
            findCursor.skip(option.getSkip());
        }
        if (option.hasSort()) {
            findCursor.sort(option.getSort());
        }
        // populate search result
        List<DBObject> result = findCursor.toArray();
        size = result.size();
        return result;
    } catch (Throwable t) {
        msg = t.getMessage();
        handleMongoException(t);
    } finally {
        if (findCursor != null) {
            findCursor.close();
        }
        logMongoAction(context, "find", start, dbCollection, queryObject, fieldObject, option, size, msg);
    }
    return Collections.emptyList();
}

From source file:com.edgytech.umongo.CollectionPanel.java

License:Apache License

private void exportToFile(final DBCollection col, final DBObject query, final DBObject fields,
        final DBObject sort, final int skip, final int limit, final int batchSize) {
    ExportDialog dia = UMongo.instance.getGlobalStore().getExportDialog();
    if (!dia.show()) {
        return;/*w ww  .java 2 s .c  o m*/
    }
    final DocumentSerializer ds = dia.getDocumentSerializer();
    final boolean continueOnError = dia.getBooleanFieldValue(ExportDialog.Item.continueOnError);
    new DbJob() {
        @Override
        public Object doRun() throws Exception {
            try {
                try {
                    DBCursor cur = col.find(query, fields);
                    if (skip > 0) {
                        cur.skip(skip);
                    }
                    if (batchSize != 0) {
                        cur.batchSize(batchSize);
                    }
                    if (sort != null) {
                        cur.sort(sort);
                    }
                    if (limit > 0) {
                        cur.limit(limit);
                    }
                    while (cur.hasNext() && !stopped) {
                        ds.writeObject(cur.next());
                    }
                } catch (Exception e) {
                    if (continueOnError) {
                        getLogger().log(Level.WARNING, null, e);
                    } else {
                        throw e;
                    }
                }
            } finally {
                ds.close();
            }
            return null;
        }

        @Override
        public String getNS() {
            return col.getFullName();
        }

        @Override
        public String getShortName() {
            return "Export";
        }
    }.addJob();
}

From source file:com.effektif.mongo.Query.java

License:Apache License

public void applyCursorConfigs(DBCursor dbCursor) {
    if (skip != null || limit != null) {
        log.debug("Applying page to cursor: skip=" + skip + ", limit=" + limit);
        if (skip != null) {
            dbCursor.skip(skip);
        }/*w w  w  .  ja v  a2 s .  c o  m*/
        if (limit != null) {
            dbCursor.limit(limit);
        }
    }
    if (orderBy != null) {
        log.debug("Applying sort to cursor: " + orderBy);
        dbCursor.sort(orderBy);
    }
}

From source file:com.google.api.ads.adwords.jaxws.extensions.report.model.persistence.mongodb.MongoEntityPersister.java

License:Open Source License

/**
 * @see com.google.api.ads.adwords.jaxws.extensions.report.model.persistence.EntityPersister
 *      #get(java.lang.Class, java.util.Map, int, int)
 *///from  www . j  a va2 s.  c o  m
@Override
public <T, V> List<T> get(Class<T> t, Map<String, V> keyValueList, int numToSkip, int limit) {
    DBCollection dbcollection = getDBCollection(t, true);

    BasicDBObject query = new BasicDBObject();
    if (keyValueList != null) {
        for (String key : keyValueList.keySet()) {
            query.put(key, keyValueList.get(key));
        }
    }
    DBCursor cur = dbcollection.find(query);

    if (limit > 0) {
        cur.limit(limit);
    }
    if (numToSkip > 0) {
        cur.skip(numToSkip);
    }

    List<T> list = new ArrayList<T>();
    while (cur.hasNext()) {
        DBObject dbObject = cur.next();
        list.add(gson.fromJson(com.mongodb.util.JSON.serialize(dbObject), t));
    }
    return list;
}

From source file:com.icoin.trading.webui.init.SystemController.java

License:Apache License

@RequestMapping(value = "/collection/{id}", method = RequestMethod.GET)
public String collection(@PathVariable("id") String collectionName,
        @RequestParam(value = "page", defaultValue = "1") int pageNumber,
        @RequestParam(value = "itemsperpage", defaultValue = "5") int itemsPerPage, Model model) {
    DBCursor dbCursor = springTemplate.getCollection(collectionName).find();
    List<DBObject> dbObjects = dbCursor.skip((pageNumber - 1) * itemsPerPage).limit(itemsPerPage).toArray();

    List<Map> items = new ArrayList<Map>(dbCursor.length());
    for (DBObject dbObject : dbObjects) {
        items.add(dbObject.toMap());//from w  w w  .  j av  a  2 s .c om
    }
    model.addAttribute("items", items);

    int totalItems = dbCursor.count();
    int numPages = ((int) Math.floor(totalItems / itemsPerPage)) + 1;
    model.addAttribute("numPages", numPages);
    model.addAttribute("page", pageNumber);
    model.addAttribute("itemsPerPage", itemsPerPage);
    model.addAttribute("collectionName", collectionName);
    return "data/collection";
}

From source file:com.ikanow.infinit.e.utility.MongoAssociationFeatureTxfer.java

License:Apache License

private void doTransfer(BasicDBObject query, int nSkip, int nLimit, BasicDBObject chunk) {
    ElasticSearchManager elasticManager = null;

    // Initialize the DB:
    DBCollection eventFeatureDB = DbManager.getFeature().getAssociation();

    // Initialize the ES (create the index if it doesn't already):

    // 1. Set-up the entity feature index 

    ElasticSearchManager.setDefaultClusterName("infinite-aws");

    // (delete the index)
    //elasticManager = ElasticSearchManager.getIndex("association_index");
    //elasticManager.deleteMe();

    // Create the index if necessary
    String sMapping = new Gson().toJson(new AssociationFeaturePojoIndexMap.Mapping(),
            AssociationFeaturePojoIndexMap.Mapping.class);
    Builder localSettings = ImmutableSettings.settingsBuilder();
    localSettings.put("number_of_shards", 1).put("number_of_replicas", 0);
    localSettings.put("index.analysis.analyzer.suggestAnalyzer.tokenizer", "standard");
    localSettings.putArray("index.analysis.analyzer.suggestAnalyzer.filter", "standard", "lowercase");

    elasticManager = ElasticSearchManager.createIndex("association_index", null, false, null, sMapping,
            localSettings);//from ww w  .j  a v a 2s.c o m

    // Get the index (necessary if already created)
    if (null == elasticManager) {
        elasticManager = ElasticSearchManager.getIndex("association_index");
    }

    // Now query the DB:

    DBCursor dbc = null;
    dbc = eventFeatureDB.find(query);
    if (null != chunk) {
        if (chunk.containsField(DbManager.min_)) {
            dbc = dbc.addSpecial(DbManager.min_, chunk.get(DbManager.min_));
        }
        if (chunk.containsField(DbManager.max_)) {
            dbc = dbc.addSpecial(DbManager.max_, chunk.get(DbManager.max_));
        }
    }
    dbc = dbc.skip(nSkip).limit(nLimit).batchSize(1000);
    if (null == chunk) {
        int nCount = dbc.count() - nSkip;
        if (nCount < 0)
            nCount = 0;
        System.out.println(
                "Found " + nCount + " records to sync, process first " + (0 == nLimit ? nCount : nLimit));
        if (0 == nCount) { // Nothing to do...
            return;
        }
    }

    List<AssociationFeaturePojo> events = new LinkedList<AssociationFeaturePojo>();

    int nSynced = 0;

    // Loop over array and invoke the cleansing function for each one
    while (dbc.hasNext()) {
        BasicDBObject dbo = (BasicDBObject) dbc.next();
        AssociationFeaturePojo evt = AssociationFeaturePojo.fromDb(dbo, AssociationFeaturePojo.class);

        // If this table has just been rebuilt from the document then the indexes are all wrong ...
        // recalculate and save
        if ('#' == evt.getIndex().charAt(0)) {
            AssociationPojo singleEvt = new AssociationPojo();
            singleEvt.setEntity1_index(evt.getEntity1_index());
            singleEvt.setEntity2_index(evt.getEntity2_index());
            singleEvt.setVerb_category(evt.getVerb_category());
            singleEvt.setGeo_index(evt.getGeo_index());
            evt.setIndex(AssociationAggregationUtils.getEventFeatureIndex(singleEvt));
            eventFeatureDB
                    .update(new BasicDBObject("_id", dbo.get("_id")),
                            new BasicDBObject(MongoDbManager.set_,
                                    new BasicDBObject(AssociationFeaturePojo.index_, evt.getIndex())),
                            false, true);
            // (has to be a multi-update even though it's unique because it's sharded on index)
        }

        // Handle groups (system group is: "4c927585d591d31d7b37097a")
        if (null == evt.getCommunityId()) {
            evt.setCommunityId(new ObjectId("4c927585d591d31d7b37097a"));
        }
        // Bulk add prep
        events.add(evt);
        nSynced++;

        if (events.size() > 1000) {
            elasticManager.bulkAddDocuments(IndexManager.mapListToIndex(events,
                    AssociationFeaturePojo.listType(), new AssociationFeaturePojoIndexMap()), "_id", null,
                    true);
            events.clear();
        }
    }
    // End loop over entities

    //write whatevers left
    elasticManager.bulkAddDocuments(IndexManager.mapListToIndex(events, AssociationFeaturePojo.listType(),
            new AssociationFeaturePojoIndexMap()), "_id", null, true);

    if (null != chunk) {
        System.out.println("Found " + nSynced + " records to sync in chunk");
    }
}

From source file:com.ikanow.infinit.e.utility.MongoDocumentTxfer.java

License:Apache License

private void doTransfer(BasicDBObject query, int nSkip, int nLimit, boolean bAggregate, BasicDBObject chunk)
        throws IOException {
    PropertiesManager pm = new PropertiesManager();
    int nMaxContentSize_bytes = pm.getMaxContentSize();

    // Initialize the DB:

    DBCollection docsDB = DbManager.getDocument().getMetadata();
    DBCollection contentDB = DbManager.getDocument().getContent();
    DBCollection sourcesDB = DbManager.getIngest().getSource();

    ElasticSearchManager.setDefaultClusterName("infinite-aws");

    // 1. Get the documents from the DB (combining data + metadata and refreshing source meta)

    // (Ignore soft-deleted records:)
    if (null == query) {
        query = new BasicDBObject();
    }//w w w  .  j  a  va2s  .  c o m
    Object sourceKeyQueryTerm = query.remove(DocumentPojo.sourceKey_);
    if (null != sourceKeyQueryTerm) {
        if (query.toString()
                .contains(new StringBuffer('"').append(DocumentPojo.sourceKey_).append('"').toString())) {
            throw new RuntimeException(
                    "Can't specify sourceKey as part of complex query term: " + query.toString());
        } //TESTED (by hand, "{ \"sourceKey\": \"x\", \"$or\": [ { \"sourceKey\": \"x\" } ] }")

        if (sourceKeyQueryTerm instanceof String) {
            query.put(DocumentPojo.sourceKey_,
                    SourcePojo.getDistributedKeyQueryTerm((String) sourceKeyQueryTerm));
        } //TESTED (by hand, "{\"sourceKey\": \"feeds.arstechnica.com.arstechnica.index.11.2.\" }")
        else if (sourceKeyQueryTerm instanceof DBObject) { // find all the _sources_ matching this term, and convert to a big list including distribution
            BasicDBObject fields = new BasicDBObject(SourcePojo.key_, 1);
            fields.put(SourcePojo.highestDistributionFactorStored_, 1);
            DBCursor dbc = sourcesDB.find(new BasicDBObject(SourcePojo.key_, sourceKeyQueryTerm), fields);
            LinkedList<String> sourceKeys = new LinkedList<String>();
            for (DBObject dbo : dbc) {
                String key = (String) dbo.get(SourcePojo.key_);
                Integer distributionFactor = (Integer) dbo.get(SourcePojo.highestDistributionFactorStored_);
                Collection<String> sourceKeysForSource = SourcePojo.getDistributedKeys(key, distributionFactor);
                sourceKeys.addAll(sourceKeysForSource);
            }
            query.put(DocumentPojo.sourceKey_, new BasicDBObject(DbManager.in_, sourceKeys));
        } //TESTED (by hand, "{\"sourceKey\": { \"$gt\": \"dev.ikanow\" } }")
        else {
            throw new RuntimeException("Can't specify sourceKey as part of complex query term");
        } //(actually not possible, just included here for mathematical completeness...)         
    } else {
        if (query.toString()
                .contains(new StringBuffer('"').append(DocumentPojo.sourceKey_).append('"').toString())) {
            throw new RuntimeException("Can't specify sourceKey as part of complex query term");
        } //TESTE (by hand, "{ \"$or\": [ { \"sourceKey\": \"x\" } ] }")

        // Optimize communityId into sourceKeys...
        if (null != query.get(DocumentPojo.communityId_)) {
            try {
                ObjectId commId = query.getObjectId(DocumentPojo.communityId_);
                BasicDBObject fields = new BasicDBObject(SourcePojo.key_, 1);
                fields.put(SourcePojo.highestDistributionFactorStored_, 1);
                DBCursor dbc = sourcesDB.find(new BasicDBObject(SourcePojo.communityIds_, commId), fields);
                LinkedList<String> sourceKeys = new LinkedList<String>();
                int added = 0;
                for (DBObject dbo : dbc) {
                    String key = (String) dbo.get(SourcePojo.key_);
                    Integer distributionFactor = (Integer) dbo.get(SourcePojo.highestDistributionFactorStored_);
                    Collection<String> sourceKeysForSource = SourcePojo.getDistributedKeys(key,
                            distributionFactor);
                    sourceKeys.addAll(sourceKeysForSource);
                    added += sourceKeysForSource.size();
                }
                query.put(DocumentPojo.sourceKey_, new BasicDBObject(DbManager.in_, sourceKeys));

                System.out.println("(Optimized simple community query to " + added + " source key(s))");
            } catch (Exception e) {
                //DEBUG
                //e.printStackTrace();

                System.out.println("(Can't optimize complex community query: " + e.getMessage());
            }
        } //TESTED (by hand - including distributed source version)
    }
    // Ignored delete objects
    Object urlQuery = query.get(DocumentPojo.url_);
    if (null == urlQuery) {
        query.put(DocumentPojo.url_, Pattern.compile("^[^?]")); // (ie nothing starting with ?)
    } //TESTED
    else if (urlQuery instanceof BasicDBObject) {
        ((BasicDBObject) urlQuery).append("$regex", "^[^?]");
    } //TESTED
      //DEBUG
      //System.out.println("COMBINED QUERY= " + query.toString());

    // If aggregating, kick off the background aggregation thread
    if (bAggregate) {
        EntityBackgroundAggregationManager.startThread();
        AssociationBackgroundAggregationManager.startThread();
    }

    //Debug:
    DBCursor dbc = null;
    dbc = docsDB.find(query);
    if (null != chunk) {
        if (chunk.containsField(DbManager.min_)) {
            dbc = dbc.addSpecial(DbManager.min_, chunk.get(DbManager.min_));
        }
        if (chunk.containsField(DbManager.max_)) {
            dbc = dbc.addSpecial(DbManager.max_, chunk.get(DbManager.max_));
        }
    }
    dbc = dbc.skip(nSkip).limit(nLimit).batchSize(1000);
    if (null == chunk) {
        int nCount = dbc.count() - nSkip;
        if (nCount < 0)
            nCount = 0;
        System.out.println(
                "Found " + nCount + " records to sync, process first " + (0 == nLimit ? nCount : nLimit));
        if (0 == nCount) { // Nothing to do...
            return;
        }
    }

    byte[] storageArray = new byte[200000];

    int nSynced = 0;
    LinkedList<DocumentPojo> docsToTransfer = new LinkedList<DocumentPojo>();
    Map<ObjectId, LinkedList<DocumentPojo>> communityList = null;
    ObjectId currCommunityId = null;
    while (dbc.hasNext()) {
        BasicDBObject dbo = (BasicDBObject) dbc.next();
        DocumentPojo doc = DocumentPojo.fromDb(dbo, DocumentPojo.class);
        String sDocIndex = doc.getIndex();
        if (null == sDocIndex) {
            sDocIndex = "document_index";
        }
        if ((null != _deletedIndex) && !_deletedIndex.contains(sDocIndex)) {
            _deletedIndex.add(sDocIndex);
            rebuildIndex(sDocIndex);
            try { // (Just in case the index requires some time to sort itself out)
                Thread.sleep(1000);
            } catch (InterruptedException e) {
            }
        }

        //Debug:
        //System.out.println("Getting content..." + feed.getTitle() + " / " + feed.getUrl());

        // Get the content:
        if ((0 != nMaxContentSize_bytes)
                && StoreAndIndexManager.docHasExternalContent(doc.getUrl(), doc.getSourceUrl())) {
            BasicDBObject contentQ = new BasicDBObject(CompressedFullTextPojo.url_, doc.getUrl());
            contentQ.put(CompressedFullTextPojo.sourceKey_,
                    new BasicDBObject(MongoDbManager.in_, Arrays.asList(null, doc.getSourceKey())));
            BasicDBObject fields = new BasicDBObject(CompressedFullTextPojo.gzip_content_, 1);
            fields.put(CompressedFullTextPojo.sourceKey_, 1);

            DBCursor dbcGzip = contentDB.find(contentQ, fields);
            while (dbcGzip.hasNext()) {
                BasicDBObject dboContent = (BasicDBObject) dbcGzip.next();
                if (!dboContent.containsField(CompressedFullTextPojo.sourceKey_)) {
                    // If this has another version then ignore this one...
                    if (dbc.hasNext()) {
                        continue;
                    } //TESTED (by hand)               
                }

                byte[] compressedData = ((byte[]) dboContent.get(CompressedFullTextPojo.gzip_content_));
                ByteArrayInputStream in = new ByteArrayInputStream(compressedData);
                GZIPInputStream gzip = new GZIPInputStream(in);
                int nRead = 0;
                StringBuffer output = new StringBuffer();
                while (nRead >= 0) {
                    nRead = gzip.read(storageArray, 0, 200000);
                    if (nRead > 0) {
                        String s = new String(storageArray, 0, nRead, "UTF-8");
                        output.append(s);
                    }
                }
                doc.setFullText(output.toString());
            }
        }
        // (else document has full text already)

        // Get tags, if necessary:
        // Always overwrite tags - one of the reasons we might choose to migrate
        // Also may need source in order to support source index filtering
        SourcePojo src = _sourceCache.get(doc.getSourceKey());
        if (null == src) {
            //TODO (INF-2265): handle search index settings in pipeline mode... (also didn't seem to work?)
            BasicDBObject srcDbo = (BasicDBObject) sourcesDB
                    .findOne(new BasicDBObject(SourcePojo.key_, doc.getSourceKey()));
            if (null != srcDbo) {
                src = SourcePojo.fromDb(srcDbo, SourcePojo.class);

                if (null != src.getProcessingPipeline()) {
                    try {
                        // Set the index settings
                        HarvestController hc = new HarvestController();
                        HarvestControllerPipeline hcPipe = new HarvestControllerPipeline();
                        hcPipe.extractSource_preProcessingPipeline(src, hc);
                    } catch (Exception e) {
                        //DEBUG
                        e.printStackTrace();
                    }
                } //TESTED (by hand)

                _sourceCache.put(doc.getSourceKey(), src);
            }
        }
        doc.setTempSource(src); // (needed for source index filtering)
        if (null != src) {
            if (null != src.getTags()) {
                Set<String> tagsTidied = new TreeSet<String>();
                for (String s : src.getTags()) {
                    String ss = s.trim().toLowerCase();
                    tagsTidied.add(ss);
                }

                // May also want to write this back to the DB:
                //TODO (INF-2223): Handle append tags or not in the pipeline...
                if ((null == src.getAppendTagsToDocs()) || src.getAppendTagsToDocs()) {
                    if ((null == doc.getTags()) || (doc.getTags().size() < tagsTidied.size())) {
                        BasicDBObject updateQuery = new BasicDBObject(DocumentPojo.sourceKey_,
                                doc.getRawSourceKey()); // (ie including the # if there is one)
                        updateQuery.put(DocumentPojo._id_, doc.getId());
                        docsDB.update(updateQuery,
                                new BasicDBObject(DbManager.addToSet_, new BasicDBObject(DocumentPojo.tags_,
                                        new BasicDBObject(DbManager.each_, tagsTidied))));
                    }
                    doc.setTags(tagsTidied); // (just copy ptr across)
                }
            }
        }

        // 2. Update the index with the new document            

        // (Optionally also update entity and assoc features)

        if (bAggregate) {
            if (null == currCommunityId) {
                currCommunityId = doc.getCommunityId();
            } else if (!currCommunityId.equals(doc.getCommunityId())) {
                LinkedList<DocumentPojo> perCommunityDocList = null;
                if (null == communityList) { // (very first time we see > 1 community)
                    communityList = new TreeMap<ObjectId, LinkedList<DocumentPojo>>();
                    perCommunityDocList = new LinkedList<DocumentPojo>();
                    perCommunityDocList.addAll(docsToTransfer); //(NOT including doc, this hasn't been added to docsToTransfer yet)
                    communityList.put(currCommunityId, perCommunityDocList);
                }
                currCommunityId = doc.getCommunityId();
                perCommunityDocList = communityList.get(currCommunityId);
                if (null == perCommunityDocList) {
                    perCommunityDocList = new LinkedList<DocumentPojo>();
                    communityList.put(currCommunityId, perCommunityDocList);
                }
                perCommunityDocList.add(doc);
            }
        } //TESTED

        nSynced++;
        docsToTransfer.add(doc);
        if (0 == (nSynced % 10000)) {
            StoreAndIndexManager manager = new StoreAndIndexManager();

            if (bAggregate) {
                // Loop over communities and aggregate each one then store the modified entities/assocs               
                doAggregation(communityList, docsToTransfer);
                communityList = null; // (in case the next 10,000 docs are all in the same community!)
                currCommunityId = null;

            } //TOTEST            

            manager.addToSearch(docsToTransfer);
            docsToTransfer.clear();
            System.out.println("(Synced " + nSynced + " records)");
        }

    } // (End loop over docs)

    // Sync remaining docs

    if (!docsToTransfer.isEmpty()) {
        if (bAggregate) {
            // Loop over communities and aggregate each one then store the modified entities/assocs               
            doAggregation(communityList, docsToTransfer);
        }

        StoreAndIndexManager manager = new StoreAndIndexManager();
        manager.addToSearch(docsToTransfer);
    }

    if (null != chunk) {
        System.out.println("Found " + nSynced + " records to sync in chunk");
    }

    if (bAggregate) {
        System.out.println("Completed. You can hit CTRL+C at any time.");
        System.out.println(
                "By default it will keep running for 5 minutes while the background aggregation runs to update the documents' entities.");
        try {
            Thread.sleep(300000);
        } catch (InterruptedException e) {
        }

        // Turn off so we can exit
        EntityBackgroundAggregationManager.stopThreadAndWait();
        AssociationBackgroundAggregationManager.stopThreadAndWait();
    }
}

From source file:com.ikanow.infinit.e.utility.MongoEntityFeatureTxfer.java

License:Apache License

private void doTransfer(BasicDBObject query, int nSkip, int nLimit, BasicDBObject chunk) {
    ElasticSearchManager elasticManager = null;

    // Initialize the DB:
    DBCollection entityFeatureDB = DbManager.getFeature().getEntity();

    // Initialize the ES (create the index if it doesn't already):

    // 1. Set-up the entity feature index 

    String indexName = "entity_index";
    ElasticSearchManager.setDefaultClusterName("infinite-aws");

    // (delete the index)
    //elasticManager = ElasticSearchManager.getIndex(indexName);
    //elasticManager.deleteMe();

    // Create the index if necessary
    String sMapping = new Gson().toJson(new EntityFeaturePojoIndexMap.Mapping(),
            EntityFeaturePojoIndexMap.Mapping.class);
    Builder localSettings = ImmutableSettings.settingsBuilder();
    localSettings.put("number_of_shards", 1).put("number_of_replicas", 0);
    localSettings.put("index.analysis.analyzer.suggestAnalyzer.tokenizer", "standard");
    localSettings.putArray("index.analysis.analyzer.suggestAnalyzer.filter", "standard", "lowercase");

    elasticManager = ElasticSearchManager.createIndex(indexName, null, false, null, sMapping, localSettings);

    // Get the index (necessary if already created)
    if (null == elasticManager) {
        elasticManager = ElasticSearchManager.getIndex(indexName);
    }/*  w  w  w .  j  a  v a2  s  . c o m*/

    // Now query the DB:

    DBCursor dbc = null;
    dbc = entityFeatureDB.find(query);
    if (null != chunk) {
        if (chunk.containsField(DbManager.min_)) {
            dbc = dbc.addSpecial(DbManager.min_, chunk.get(DbManager.min_));
        }
        if (chunk.containsField(DbManager.max_)) {
            dbc = dbc.addSpecial(DbManager.max_, chunk.get(DbManager.max_));
        }
    }
    dbc = dbc.skip(nSkip).limit(nLimit).batchSize(1000);
    if (null == chunk) {
        int nCount = dbc.count() - nSkip;
        if (nCount < 0)
            nCount = 0;
        System.out.println(
                "Found " + nCount + " records to sync, process first " + (0 == nLimit ? nCount : nLimit));
        if (0 == nCount) { // Nothing to do...
            return;
        }
    }

    int nSynced = 0;

    List<EntityFeaturePojo> entities = new ArrayList<EntityFeaturePojo>();
    while (dbc.hasNext()) {
        EntityFeaturePojo feature = EntityFeaturePojo.fromDb(dbc.next(), EntityFeaturePojo.class);

        if (null != feature.getAlias()) { // (some corrupt gazateer entry)

            // Handle groups (system group is: "4c927585d591d31d7b37097a")
            // if there is no community id, add system group (something is wrong if this happens?)
            if (null == feature.getCommunityId()) {
                feature.setCommunityId(new ObjectId("4c927585d591d31d7b37097a"));
            }
        }

        entities.add(feature);
        nSynced++;

        // Add the entities
        if (entities.size() > 1000) {
            elasticManager.bulkAddDocuments(IndexManager.mapListToIndex(entities, EntityFeaturePojo.listType(),
                    new EntityFeaturePojoIndexMap()), "_id", null, true);
            // (note EntityFeaturePojoIndexMap creates an "_id" field of the format index:community)

            entities = new ArrayList<EntityFeaturePojo>();
        }
    }
    //write whatevers left
    elasticManager.bulkAddDocuments(IndexManager.mapListToIndex(entities, EntityFeaturePojo.listType(),
            new EntityFeaturePojoIndexMap()), "_id", null, true);
    // (note EntityFeaturePojoIndexMap creates an "_id" field of the format index:community)

    if (null != chunk) {
        System.out.println("Found " + nSynced + " records to sync in chunk");
    }
}