Example usage for com.mongodb BasicDBObject getString

List of usage examples for com.mongodb BasicDBObject getString

Introduction

In this page you can find the example usage for com.mongodb BasicDBObject getString.

Prototype

public String getString(final String key, final String def) 

Source Link

Document

Returns the value of a field as a string

Usage

From source file:com.epam.dlab.auth.dao.UserInfoDAOMongoImpl.java

License:Apache License

@Override
public UserInfo getUserInfoByAccessToken(String accessToken) {
    BasicDBObject uiSearchDoc = new BasicDBObject();
    uiSearchDoc.put("_id", accessToken);
    MongoCollection<BasicDBObject> mc = ms.getCollection("security", BasicDBObject.class);
    FindIterable<BasicDBObject> res = mc.find(uiSearchDoc);
    BasicDBObject uiDoc = res.first();
    if (uiDoc == null) {
        log.warn("UI not found {}", accessToken);
        return null;
    }/*from w ww . j ava 2 s  .  c  o  m*/
    Date lastAccess = uiDoc.getDate("expireAt");
    if (inactiveUserTimeoutMsec < Math.abs(new Date().getTime() - lastAccess.getTime())) {
        log.warn("UI for {} expired but were not evicted from DB. Contact MongoDB admin to create expireable "
                + "index" + " on 'expireAt' key.", accessToken);
        this.deleteUserInfo(accessToken);
        return null;
    }
    String name = uiDoc.get("name").toString();
    String firstName = uiDoc.getString("firstName", "");
    String lastName = uiDoc.getString("lastName", "");
    String remoteIp = uiDoc.getString("remoteIp", "");
    BasicDBList roles = (BasicDBList) uiDoc.get("roles");
    Boolean awsUser = uiDoc.getBoolean("awsUser", false);
    UserInfo ui = new UserInfo(name, accessToken);
    ui.setFirstName(firstName);
    ui.setLastName(lastName);
    ui.setRemoteIp(remoteIp);
    ui.setAwsUser(awsUser);
    Object awsKeys = uiDoc.get("awsKeys");
    if (awsKeys != null) {
        ((BasicDBObject) awsKeys).forEach((key, val) -> ui.addKey(key, val.toString()));
    }
    roles.forEach(o -> ui.addRole("" + o));
    log.debug("Found persistent {}", ui);
    return ui;
}

From source file:com.ikanow.aleph2.v1.document_db.utils.LegacyV1HadoopUtils.java

License:Open Source License

/** parse the V1 query string 
 * @param query//from  w w w  . j a  va 2s . com
 * @return the required objects embedded in various tuples
 */
public static Tuple4<String, Tuple2<Integer, Integer>, BasicDBObject, DBObject> parseQueryObject(
        final String query, final List<String> community_ids) {
    // Some fixed variables just to avoid changing the guts of the (tested in v1) code
    final boolean isCustomTable = false;
    @SuppressWarnings("unused")
    Integer nDebugLimit = null;
    final boolean bLocalMode = false;
    @SuppressWarnings("unused")
    final Boolean incrementalMode = null;
    final String input = "doc_metadata.metadata";

    // Output objects
    final String out_query;
    int nSplits = 8;
    int nDocsPerSplit = 12500;

    List<ObjectId> communityIds = community_ids.stream().map(s -> new ObjectId(s)).collect(Collectors.toList());

    //C/P code:

    //add communities to query if this is not a custom table
    BasicDBObject oldQueryObj = null;
    BasicDBObject srcTags = null;
    // Start with the old query:
    if (query.startsWith("{")) {
        oldQueryObj = (BasicDBObject) com.mongodb.util.JSON.parse(query);
    } else {
        oldQueryObj = new BasicDBObject();
    }
    boolean elasticsearchQuery = oldQueryObj.containsField("qt") && !isCustomTable;
    @SuppressWarnings("unused")
    int nLimit = 0;
    if (oldQueryObj.containsField(":limit")) {
        nLimit = oldQueryObj.getInt(":limit");
        oldQueryObj.remove(":limit");
    }
    if (oldQueryObj.containsField(":splits")) {
        nSplits = oldQueryObj.getInt(":splits");
        oldQueryObj.remove(":splits");
    }
    if (oldQueryObj.containsField(":srctags")) {
        srcTags = new BasicDBObject(SourcePojo.tags_, oldQueryObj.get(":srctags"));
        oldQueryObj.remove(":srctags");
    }
    if (bLocalMode) { // If in local mode, then set this to a large number so we always run inside our limit/split version
        // (since for some reason MongoInputFormat seems to fail on large collections)
        nSplits = InfiniteMongoSplitter.MAX_SPLITS;
    }
    if (oldQueryObj.containsField(":docsPerSplit")) {
        nDocsPerSplit = oldQueryObj.getInt(":docsPerSplit");
        oldQueryObj.remove(":docsPerSplit");
    }
    final DBObject fields = (DBObject) oldQueryObj.remove(":fields");
    oldQueryObj.remove(":output");
    oldQueryObj.remove(":reducers");
    @SuppressWarnings("unused")
    String mapperKeyClass = oldQueryObj.getString(":mapper_key_class", "");
    @SuppressWarnings("unused")
    String mapperValueClass = oldQueryObj.getString(":mapper_value_class", "");
    oldQueryObj.remove(":mapper_key_class");
    oldQueryObj.remove(":mapper_value_class");
    String cacheList = null;
    Object cacheObj = oldQueryObj.get(":caches");
    if (null != cacheObj) {
        cacheList = cacheObj.toString(); // (either array of strings, or single string)
        if (!cacheList.startsWith("[")) {
            cacheList = "[" + cacheList + "]"; // ("must" now be valid array)
        }
        oldQueryObj.remove(":caches");
    } //TESTED

    //      if (null != nDebugLimit) { // (debug mode override)
    //         nLimit = nDebugLimit;
    //      }
    //      boolean tmpIncMode = ( null != incrementalMode) && incrementalMode; 

    @SuppressWarnings("unused")
    String otherCollections = null;
    Date fromOverride = null;
    Date toOverride = null;
    Object fromOverrideObj = oldQueryObj.remove(":tmin");
    Object toOverrideObj = oldQueryObj.remove(":tmax");
    if (null != fromOverrideObj) {
        fromOverride = dateStringFromObject(fromOverrideObj, true);
    }
    if (null != toOverrideObj) {
        toOverride = dateStringFromObject(toOverrideObj, false);
    }

    if (!isCustomTable) {
        if (elasticsearchQuery) {
            oldQueryObj.put("communityIds", communityIds);
            //tmin/tmax not supported - already have that capability as part of the query
        } else {
            if (input.equals("feature.temporal")) {
                if ((null != fromOverride) || (null != toOverride)) {
                    oldQueryObj.put("value.maxTime", createDateRange(fromOverride, toOverride, true));
                } //TESTED
                oldQueryObj.put("_id.c", new BasicDBObject(DbManager.in_, communityIds));
            } else {
                oldQueryObj.put(DocumentPojo.communityId_, new BasicDBObject(DbManager.in_, communityIds));
                if ((null != fromOverride) || (null != toOverride)) {
                    oldQueryObj.put(JsonUtils._ID, createDateRange(fromOverride, toOverride, false));
                } //TESTED         
                if (input.equals("doc_metadata.metadata")) {
                    oldQueryObj.put(DocumentPojo.index_, new BasicDBObject(DbManager.ne_, "?DEL?")); // (ensures not soft-deleted)
                }
            }
        }
    } else {
        throw new RuntimeException("Custom Tables not currently supported (no plans to)");
        //         if ((null != fromOverride) || (null != toOverride)) {
        //            oldQueryObj.put(JsonUtils._ID, createDateRange(fromOverride, toOverride, false));
        //         }//TESTED
        //         //get the custom table (and database)
        //
        //         String[] candidateInputs = input.split("\\s*,\\s*");
        //         input = CustomOutputManager.getCustomDbAndCollection(candidateInputs[0]);
        //         if (candidateInputs.length > 1) {            
        //            otherCollections = Arrays.stream(candidateInputs)
        //                  .skip(1L)
        //                  .map(i -> CustomOutputManager.getCustomDbAndCollection(i))
        //                  .map(i -> "mongodb://"+dbserver+"/"+i).collect(Collectors.joining("|"));
        //         }
    }
    out_query = oldQueryObj.toString();

    return Tuples._4T(out_query, Tuples._2T(nSplits, nDocsPerSplit), srcTags, fields);
}

From source file:com.ikanow.infinit.e.api.knowledge.federated.SimpleFederatedQueryEngine.java

License:Open Source License

@Override
public void postQueryActivities(ObjectId queryId, List<BasicDBObject> docs, ResponsePojo response) {
    boolean grabbedScores = false;
    double aggregateSignif = 100.0;
    double queryRelevance = 100.0;
    double score = 100.0;

    if (null != _asyncRequestsPerQuery) {
        int added = 0;
        BasicDBList bsonArray = new BasicDBList();
        PeekingIterator<FederatedRequest> it = Iterators.peekingIterator(_asyncRequestsPerQuery.iterator());
        while (it.hasNext()) {
            // loop state:
            BasicDBObject[] docOrDocs = new BasicDBObject[1];
            docOrDocs[0] = null;/*from w ww. j  a  v a  2s.  c om*/

            FederatedRequest request = it.next();
            boolean isComplexSource = isComplexSource(request.endpointInfo.parentSource);
            if (null == request.cachedDoc) { // no cached doc, simple source processing (OR ANY COMPLEX CASE BY CONSTRUCTION)
                try {
                    if ((null == request.cachedResult) || isComplexSource) { // no cached api response, or complex         
                        if (null != request.importThread) {
                            // 1) wait for the thread to finish
                            if (null == request.endpointInfo.queryTimeout_secs) {
                                request.endpointInfo.queryTimeout_secs = 300;
                            }
                            for (int timer = 0; timer < request.endpointInfo.queryTimeout_secs; timer++) {
                                try {
                                    request.importThread.join(1000L);
                                    if (!request.importThread.isAlive()) {
                                        break;
                                    }
                                } //TESTED (by hand)
                                catch (Exception e) {
                                    //(carry on)
                                }
                            }
                            if (request.importThread.isAlive()) {
                                request.errorMessage = new RuntimeException("Script timed out");
                            } //TESTED (by hand)

                            // 2) Get the results
                            if (null != request.errorMessage) {
                                if (_testMode) {
                                    throw new RuntimeException(request.errorMessage);
                                }
                            } else if (isComplexSource) {
                                //DEBUG 
                                if (_DEBUG)
                                    _logger.debug("DEB: postQA0: " + request.complexSourceProcResults.size());

                                handleComplexDocCaching(request, _cacheMode, _scoreStats);

                                // Get a list of docs
                                docOrDocs = ((BasicDBList) DocumentPojo
                                        .listToDb(request.complexSourceProcResults, DocumentPojo.listType()))
                                                .toArray(new BasicDBObject[0]);

                                // (_API_ caching is exactly the same between cache and non-cache cases)
                                // (note that if null != complexSourceProcResults then follows that null != scriptResult)
                                String url = buildScriptUrl(request.mergeKey, request.queryIndex);

                                if (!(request.importThread instanceof FederatedSimpleHarvest) && _cacheMode) { // (don't cache python federated queries in test mode)
                                    // (simple harvest caching is done separately)
                                    this.cacheApiResponse(url, request.scriptResult, request.endpointInfo);
                                }
                            } //TESTED (by hand - single and multiple doc mode)               
                            else if (null == request.scriptResult) {
                                if (_testMode) {
                                    throw new RuntimeException("Script mode: no cached result found from: "
                                            + request.requestParameter);
                                }
                            } else {
                                // (_API_ caching is exactly the same between cache and non-cache cases)
                                String url = buildScriptUrl(request.mergeKey, request.queryIndex);
                                if (_cacheMode) { // (don't cache python federated queries in test mode)
                                    this.cacheApiResponse(url, request.scriptResult, request.endpointInfo);
                                }
                                bsonArray.add(request.scriptResult);
                            }
                        } // end script mode
                        else { // HTTP mode (also: must be simple source builder)
                            Response endpointResponse = request.responseFuture.get();
                            request.asyncClient.close();
                            request.asyncClient = null;

                            String jsonStr = endpointResponse.getResponseBody();
                            String url = endpointResponse.getUri().toURL().toString();

                            Object bsonUnknownType = com.mongodb.util.JSON.parse(jsonStr);
                            BasicDBObject bson = null;
                            if (bsonUnknownType instanceof BasicDBObject) {
                                bson = (BasicDBObject) bsonUnknownType;
                            } else if (bsonUnknownType instanceof BasicDBList) {
                                bson = new BasicDBObject(SimpleFederatedCache.array_, bsonUnknownType);
                            } else if (bsonUnknownType instanceof String) {
                                bson = new BasicDBObject(SimpleFederatedCache.value_, bsonUnknownType);
                            }

                            //DEBUG
                            if (_DEBUG)
                                _logger.debug("DEB: postQA1: " + url + ": " + jsonStr);

                            if (null != bson) {
                                MongoDbUtil.enforceTypeNamingPolicy(bson, 0);
                                this.cacheApiResponse(url, bson, request.endpointInfo);
                                bsonArray.add(bson);
                            }
                        } //(end script vs request method)
                    } //TESTED (3.1, 4.2)
                    else { // (just used cached value)
                        //DEBUG 
                        if (_DEBUG)
                            _logger.debug("DEB: postQA2: " + request.cachedResult.toString());

                        bsonArray.add(
                                (BasicDBObject) request.cachedResult.get(SimpleFederatedCache.cachedJson_));
                    } //TESTED (4.1, 4.3)
                } catch (Exception e) {
                    //DEBUG
                    if (null == request.subRequest) {
                        _logger.error("Error with script: " + e.getMessage());
                        if (_testMode) {
                            throw new RuntimeException("Error with script: " + e.getMessage(), e);
                        }
                    } else {
                        _logger.error("Error with " + request.subRequest.endPointUrl + ": " + e.getMessage());
                        if (_testMode) {
                            throw new RuntimeException(
                                    "Error with " + request.subRequest.endPointUrl + ": " + e.getMessage(), e);
                        }
                    }
                }

                if (null == docOrDocs[0]) {
                    // (this next bit of logic can only occur in simple source cases by construction, phew)
                    if (!it.hasNext() || (request.mergeKey != it.peek().mergeKey)) { // deliberate ptr arithmetic
                        String url = buildScriptUrl(request.mergeKey, request.queryIndex);

                        //DEBUG
                        if (_DEBUG)
                            _logger.debug("DEB: postQA3: " + url + ": " + bsonArray);

                        docOrDocs[0] = createDocFromJson(bsonArray, url, request, request.endpointInfo);
                    }
                }
            } // (end if no cached doc)
            else { // cached doc, bypass lots of processing because no merging and doc already built (simple source processing)
                docOrDocs[0] = request.cachedDoc;
            } //TESTED (by hand)

            if (null != docOrDocs[0])
                for (BasicDBObject doc : docOrDocs) {

                    // Cache the document unless already cached (or caching disabled)
                    if ((null == request.cachedDoc) && _cacheMode && !isComplexSource
                            && ((null == request.endpointInfo.cacheTime_days)
                                    || (request.endpointInfo.cacheTime_days >= 0))) {
                        simpleDocCache(request, doc);
                    } //TESTED (by hand, 3 cases: cached not expired, cached expired first time, cached expired multiple times)

                    if (!grabbedScores) {
                        if (!docs.isEmpty()) {
                            BasicDBObject topDoc = docs.get(0);
                            aggregateSignif = topDoc.getDouble(DocumentPojo.aggregateSignif_, aggregateSignif);
                            queryRelevance = topDoc.getDouble(DocumentPojo.queryRelevance_, queryRelevance);
                            score = topDoc.getDouble(DocumentPojo.score_, score);
                            grabbedScores = true;

                            // OK would also like to grab the original matching entity, if it exists
                            if (!isComplexSource) {
                                BasicDBList ents = (BasicDBList) topDoc.get(DocumentPojo.entities_);
                                if (null != ents) {
                                    for (Object entObj : ents) {
                                        BasicDBObject ent = (BasicDBObject) entObj;
                                        String entIndex = ent.getString(EntityPojo.index_, "");
                                        if (entIndex.equals(request.queryIndex)) {
                                            ents = (BasicDBList) doc.get(DocumentPojo.entities_);
                                            if (null != ents) {
                                                ents.add(ent);
                                            }
                                            break;
                                        }
                                    }
                                } //TESTED (by hand)
                            }
                        }
                    }
                    doc.put(DocumentPojo.aggregateSignif_, aggregateSignif);
                    doc.put(DocumentPojo.queryRelevance_, queryRelevance);
                    doc.put(DocumentPojo.score_, score);

                    // Swap id and updateId, everything's been cached now:
                    // Handle update ids vs normal ids:
                    ObjectId updateId = (ObjectId) doc.get(DocumentPojo.updateId_);
                    if (null != updateId) { // swap the 2...
                        doc.put(DocumentPojo.updateId_, doc.get(DocumentPojo._id_));
                        doc.put(DocumentPojo._id_, updateId);
                    } //TESTED (by hand)            

                    // If we're returning to a query then we'll adjust the doc format (some of the atomic fields become arrays)
                    if (!_testMode) {
                        convertDocToQueryFormat(doc, request.communityIdStrs);
                    } //TESTED (by hand)

                    docs.add(0, doc);
                    added++;
                    //(doc auto reset at top of loop)

                    //(end if built a doc from the last request/set of requests)
                } //TESTED (3.1)      

        } //(end loop over federated requests)

        if (null != response.getStats()) {
            response.getStats().found += added;
        } //TESTED (by hand)         
    }
}

From source file:com.ikanow.infinit.e.core.utils.SourceUtils.java

License:Open Source License

private static boolean updateHarvestDistributionState_tokenComplete(SourcePojo source,
        HarvestEnum harvestStatus, BasicDBObject incClause, BasicDBObject setClause) {

    // Update tokens complete, and retrieve modified version 
    int nTokensToBeCleared = source.getDistributionTokens().size();
    BasicDBObject query = new BasicDBObject(SourcePojo._id_, source.getId());
    BasicDBObject modify = new BasicDBObject(MongoDbManager.inc_, new BasicDBObject(
            SourceHarvestStatusPojo.sourceQuery_distributionTokensComplete_, nTokensToBeCleared));
    BasicDBObject fields = new BasicDBObject(SourceHarvestStatusPojo.sourceQuery_distributionTokensComplete_,
            1);//  w ww. j  a  v  a 2 s  . com
    fields.put(SourceHarvestStatusPojo.sourceQuery_harvest_status_, 1);
    fields.put(SourceHarvestStatusPojo.sourceQuery_distributionReachedLimit_, 1);
    BasicDBObject partial = (BasicDBObject) MongoDbManager.getIngest().getSource().findAndModify(query, fields,
            null, false, modify, true, false);
    //(return new version - ensures previous increments have been taken into account)

    // Two cases: source complete (all tokens obtained), source incomplete:

    if (null != partial) { // (else yikes!)
        BasicDBObject partialStatus = (BasicDBObject) partial.get(SourcePojo.harvest_);
        if (null != partialStatus) { // (else yikes!)
            int nTokensComplete = partialStatus.getInt(SourceHarvestStatusPojo.distributionTokensComplete_, 0);
            // (note after increment)

            // COMPLETE: reset parameters, status -> error (if anything has errored), success (all done), success_iteration (more to do)

            if (nTokensComplete == source.getDistributionFactor()) {
                if (!source.reachedMaxDocs()) { // (Can only do this if we've finished the source...
                    //...else the different threads can be at different points, so the most recent doc for one thread might be
                    // before the most recent doc of another)
                    setClause.put(SourceHarvestStatusPojo.sourceQuery_distributedLastCompletedCycle_,
                            new Date());
                }

                setClause.put(SourceHarvestStatusPojo.sourceQuery_distributionTokensComplete_, 0);
                setClause.put(SourceHarvestStatusPojo.sourceQuery_distributionTokensFree_,
                        source.getDistributionFactor());
                setClause.put(SourceHarvestStatusPojo.sourceQuery_distributionReachedLimit_, false); // (resetting this)
                // This source is now complete
                String status = partialStatus.getString(SourceHarvestStatusPojo.harvest_status_, null);
                Boolean reachedLimit = partialStatus.getBoolean(
                        SourceHarvestStatusPojo.distributionReachedLimit_, false) || source.reachedMaxDocs();

                if ((null != status) && ((status.equalsIgnoreCase(HarvestEnum.error.toString())
                        || (HarvestEnum.error == harvestStatus)))) {
                    setClause.put(SourceHarvestStatusPojo.sourceQuery_harvest_status_,
                            HarvestEnum.error.toString());
                } //TESTED (current and previous state == error)
                else if (reachedLimit || (HarvestEnum.success_iteration == harvestStatus)) {

                    setClause.put(SourceHarvestStatusPojo.sourceQuery_harvest_status_,
                            HarvestEnum.success_iteration.toString());
                } //TESTED (from previous or current state)

                // (else leave with default of success)

                //DEBUG
                //System.out.println(Thread.currentThread().getName() + " COMPLETE_SRC COMPLETE_TOKEN=" + source.getKey() + " / " + setClause.toString() + " / " + incClause.toString() + " / " + nTokensComplete);

                return true;

            } //TESTED
            else { // Not complete

                // If we're here then we're only allowed to update the status to error
                if (HarvestEnum.error != harvestStatus) {
                    setClause.remove(SourceHarvestStatusPojo.sourceQuery_harvest_status_);
                } //TESTED
                if (source.reachedMaxDocs()) {
                    setClause.put(SourceHarvestStatusPojo.sourceQuery_distributionReachedLimit_, true);
                } //TESTED

                //DEBUG
                //System.out.println(Thread.currentThread().getName() + " COMPLETE_TOKEN=" + source.getKey() + " / " + setClause.toString() + " / " + incClause.toString() + " / " + nTokensComplete);

                return false;

            } //(end is complete or not)
              //TESTED (reached max limit)

        } //(end found partial source status, else catastrophic failure)
    } //(end found partial source, else catastrophic failure)

    return false;

}

From source file:com.ikanow.infinit.e.harvest.extraction.document.logstash.LogstashHarvester.java

License:Open Source License

@Override
public void executeHarvest(HarvestContext context, SourcePojo source, List<DocumentPojo> toAdd,
        List<DocumentPojo> toUpdate, List<DocumentPojo> toRemove) {

    if (ElasticSearchManager.getVersion() < 100) {
        context.getHarvestStatus().update(source, new Date(), HarvestEnum.error,
                "This version of infinit.e (elasticsearch version < 1.0) does not support logstash, you will need to upgrade to v0.3 and ensure your elasticsearch instance is upgraded.",
                true, false);/*from   w  w w .java2s  .  com*/
        return;
    }

    if (context.isStandalone()) { // test mode

        // Get the configuration

        String logStashConfig = null;
        SourcePipelinePojo logstashElement = null;
        for (SourcePipelinePojo pxPipe : source.getProcessingPipeline()) { /// (must be non null if here)
            if (null != pxPipe.logstash) {
                logStashConfig = pxPipe.logstash.config;
                logstashElement = pxPipe;
            }
            break;
        }
        if ((null == logStashConfig) || logStashConfig.isEmpty()) {
            context.getHarvestStatus().update(source, new Date(), HarvestEnum.error,
                    "Logstash internal logic error, no configuration", true, false);
            return;
        }

        // Admin check (passed down)

        boolean isAdmin = AuthUtils.isAdmin(source.getOwnerId());

        // Perform the request

        ObjectId requestId = new ObjectId();
        BasicDBObject logQ = new BasicDBObject("_id", requestId);
        boolean removeJobWhenDone = true;

        try {

            // (See records.service for the programmatic definition of this message)
            logstashElement.logstash.config = logStashConfig;
            BasicDBObject logStashDbo = (BasicDBObject) logstashElement.toDb();
            logStashDbo.put("_id", requestId);
            logStashDbo.put("maxDocs", context.getStandaloneMaxDocs());
            logStashDbo.put("sourceKey", source.getKey());
            logStashDbo.put("isAdmin", isAdmin);

            // Step 0: place request on Q
            DbManager.getIngest().getLogHarvesterQ().save(logStashDbo);

            // Step 1: has my request been serviced:
            boolean serviced = false;
            String error = null;

            final int WAIT_TIME_2_MINS = 120;
            for (int time = 0; time < WAIT_TIME_2_MINS; time += 5) { // (allow 2 minutes for servicing)
                //1. have i been removed from queue?
                //2. check size of logstash queue - is it decreasing
                try {
                    Thread.sleep(5000); // check every 5s
                    logStashDbo = (BasicDBObject) DbManager.getIngest().getLogHarvesterQ().findOne(logQ);
                    if (null == logStashDbo) {
                        removeJobWhenDone = false;
                        serviced = true;
                        break; // found!
                    } //TESTED
                    error = logStashDbo.getString("error", null);
                    if (null != error) {
                        break; // bad!
                    } //TESTED
                } catch (Exception e) {
                }
            }
            if (!serviced) {
                DbManager.getIngest().getLogHarvesterQ().remove(logQ);
                removeJobWhenDone = false;

                if (null == error) {
                    context.getHarvestStatus().update(source, new Date(), HarvestEnum.error,
                            "Logstash service appears not to be running", true, false);
                } else {
                    context.getHarvestStatus().update(source, new Date(), HarvestEnum.error,
                            "Logstash service reports error: " + error, true, false);
                } //TESTED
                return;
            } //TESTED

            // Step 2: get data from the queue
            final int WAIT_TIME_5_MINS = 300;
            for (int time = 0; time < WAIT_TIME_5_MINS; time += 5) { // (allow 5 minutes for processing)

                logStashDbo = (BasicDBObject) DbManager.getIngest().getLogHarvesterQ().findOne(logQ);
                if (null != logStashDbo) { // if it reappears then there's been an error so handle and exit
                    DbManager.getIngest().getLogHarvesterQ().remove(logQ);
                    removeJobWhenDone = false;

                    long count = DbManager.getCollection("ingest", requestId.toString()).count();
                    if (count > 0) {
                        DBCursor dbc = DbManager.getCollection("ingest", requestId.toString()).find()
                                .limit(context.getStandaloneMaxDocs());
                        for (Object o : dbc) {
                            DocumentPojo doc = new DocumentPojo();
                            doc.addToMetadata("record", o);
                            toAdd.add(doc);
                        }
                        error = logStashDbo.getString("error", "no info");
                        context.getHarvestStatus().update(source, new Date(), HarvestEnum.success,
                                "Logstash service info: " + error, false, false);
                        break;
                    } //TESTED
                    else { // Then it's an error:
                        error = logStashDbo.getString("error", null);

                        if (error == null) {
                            if (0 == context.getStandaloneMaxDocs()) {
                                context.getHarvestStatus().update(source, new Date(), HarvestEnum.success,
                                        "Logstash service info: success", false, false);
                                break;
                            } else {
                                error = "unknown error";
                            }
                        } //TESTED

                        context.getHarvestStatus().update(source, new Date(), HarvestEnum.error,
                                "Logstash service reports error: " + error, true, false);
                        return;
                    } //TESTED

                } //TESTED
                try {
                    Thread.sleep(5000); // check every 5s
                } catch (Exception e) {
                }

            } // (end loop while waiting for docs)
        } finally { // just to be on the safe side...
            if (removeJobWhenDone) {
                DbManager.getIngest().getLogHarvesterQ().remove(logQ);
            }
            try {
                DbManager.getCollection("ingest", requestId.toString()).drop();
            } catch (Exception e) {
            } // that's fine it just doesn't exist
        }
    } else {
        context.getHarvestStatus().update(source, new Date(), HarvestEnum.error,
                "Tried to harvest logstash data internally", true, false);
        return;
    }
}

From source file:com.ikanow.infinit.e.processing.custom.launcher.CustomHadoopTaskLauncher.java

License:Open Source License

@SuppressWarnings({ "unchecked", "rawtypes" })
public String runHadoopJob(CustomMapReduceJobPojo job, String tempJarLocation)
        throws IOException, SAXException, ParserConfigurationException {
    StringWriter xml = new StringWriter();
    String outputCollection = job.outputCollectionTemp;// (non-append mode) 
    if ((null != job.appendResults) && job.appendResults)
        outputCollection = job.outputCollection; // (append mode, write directly in....)
    else if (null != job.incrementalMode)
        job.incrementalMode = false; // (not allowed to be in incremental mode and not update mode)

    createConfigXML(xml, job.jobtitle, job.inputCollection,
            InfiniteHadoopUtils.getQueryOrProcessing(job.query, InfiniteHadoopUtils.QuerySpec.INPUTFIELDS),
            job.isCustomTable, job.getOutputDatabase(), job._id.toString(), outputCollection, job.mapper,
            job.reducer, job.combiner,/*from  ww  w . j ava2 s .  co m*/
            InfiniteHadoopUtils.getQueryOrProcessing(job.query, InfiniteHadoopUtils.QuerySpec.QUERY),
            job.communityIds, job.outputKey, job.outputValue, job.arguments, job.incrementalMode,
            job.submitterID, job.selfMerge, job.outputCollection, job.appendResults);

    ClassLoader savedClassLoader = Thread.currentThread().getContextClassLoader();

    URLClassLoader child = new URLClassLoader(new URL[] { new File(tempJarLocation).toURI().toURL() },
            savedClassLoader);
    Thread.currentThread().setContextClassLoader(child);

    // Check version: for now, any infinit.e.data_model with an VersionTest class is acceptable
    boolean dataModelLoaded = true;
    try {
        URLClassLoader versionTest = new URLClassLoader(new URL[] { new File(tempJarLocation).toURI().toURL() },
                null);
        try {
            Class.forName("com.ikanow.infinit.e.data_model.custom.InfiniteMongoInputFormat", true, versionTest);
        } catch (ClassNotFoundException e2) {
            //(this is fine, will use the cached version)
            dataModelLoaded = false;
        }
        if (dataModelLoaded)
            Class.forName("com.ikanow.infinit.e.data_model.custom.InfiniteMongoVersionTest", true, versionTest);
    } catch (ClassNotFoundException e1) {
        throw new RuntimeException(
                "This JAR is compiled with too old a version of the data-model, please recompile with Jan 2014 (rc2) onwards");
    }

    // Now load the XML into a configuration object: 
    Configuration config = new Configuration();
    // Add the client configuration overrides:
    if (!bLocalMode) {
        String hadoopConfigPath = props_custom.getHadoopConfigPath() + "/hadoop/";
        config.addResource(new Path(hadoopConfigPath + "core-site.xml"));
        config.addResource(new Path(hadoopConfigPath + "mapred-site.xml"));
        config.addResource(new Path(hadoopConfigPath + "hadoop-site.xml"));
    } //TESTED

    try {
        DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
        DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
        Document doc = dBuilder.parse(new ByteArrayInputStream(xml.toString().getBytes()));
        NodeList nList = doc.getElementsByTagName("property");

        for (int temp = 0; temp < nList.getLength(); temp++) {
            Node nNode = nList.item(temp);
            if (nNode.getNodeType() == Node.ELEMENT_NODE) {
                Element eElement = (Element) nNode;
                String name = getTagValue("name", eElement);
                String value = getTagValue("value", eElement);
                if ((null != name) && (null != value)) {
                    config.set(name, value);
                }
            }
        }
    } catch (Exception e) {
        throw new IOException(e.getMessage());
    }

    // Some other config defaults:
    // (not sure if these are actually applied, or derived from the defaults - for some reason they don't appear in CDH's client config)
    config.set("mapred.map.tasks.speculative.execution", "false");
    config.set("mapred.reduce.tasks.speculative.execution", "false");
    // (default security is ignored here, have it set via HADOOP_TASKTRACKER_CONF in cloudera)

    // Now run the JAR file
    try {
        BasicDBObject advancedConfigurationDbo = null;
        try {
            advancedConfigurationDbo = (null != job.query)
                    ? ((BasicDBObject) com.mongodb.util.JSON.parse(job.query))
                    : (new BasicDBObject());
        } catch (Exception e) {
            advancedConfigurationDbo = new BasicDBObject();
        }
        boolean esMode = advancedConfigurationDbo.containsField("qt") && !job.isCustomTable;
        if (esMode && !job.inputCollection.equals("doc_metadata.metadata")) {
            throw new RuntimeException(
                    "Infinit.e Queries are only supported on doc_metadata - use MongoDB queries instead.");
        }

        config.setBoolean("mapred.used.genericoptionsparser", true); // (just stops an annoying warning from appearing)
        if (bLocalMode) { // local job tracker and FS mode
            config.set("mapred.job.tracker", "local");
            config.set("fs.default.name", "local");
        } else {
            if (bTestMode) { // run job tracker locally but FS mode remotely
                config.set("mapred.job.tracker", "local");
            } else { // normal job tracker
                String trackerUrl = HadoopUtils.getXMLProperty(
                        props_custom.getHadoopConfigPath() + "/hadoop/mapred-site.xml", "mapred.job.tracker");
                config.set("mapred.job.tracker", trackerUrl);
            }
            String fsUrl = HadoopUtils.getXMLProperty(
                    props_custom.getHadoopConfigPath() + "/hadoop/core-site.xml", "fs.default.name");
            config.set("fs.default.name", fsUrl);
        }
        if (!dataModelLoaded && !(bTestMode || bLocalMode)) { // If running distributed and no data model loaded then add ourselves
            Path jarToCache = InfiniteHadoopUtils.cacheLocalFile("/opt/infinite-home/lib/",
                    "infinit.e.data_model.jar", config);
            DistributedCache.addFileToClassPath(jarToCache, config);
            jarToCache = InfiniteHadoopUtils.cacheLocalFile("/opt/infinite-home/lib/",
                    "infinit.e.processing.custom.library.jar", config);
            DistributedCache.addFileToClassPath(jarToCache, config);
        } //TESTED

        // Debug scripts (only if they exist), and only in non local/test mode
        if (!bLocalMode && !bTestMode) {

            try {
                Path scriptToCache = InfiniteHadoopUtils.cacheLocalFile("/opt/infinite-home/scripts/",
                        "custom_map_error_handler.sh", config);
                config.set("mapred.map.task.debug.script", "custom_map_error_handler.sh " + job.jobtitle);
                config.set("mapreduce.map.debug.script", "custom_map_error_handler.sh " + job.jobtitle);
                DistributedCache.createSymlink(config);
                DistributedCache.addCacheFile(scriptToCache.toUri(), config);
            } catch (Exception e) {
            } // just carry on

            try {
                Path scriptToCache = InfiniteHadoopUtils.cacheLocalFile("/opt/infinite-home/scripts/",
                        "custom_reduce_error_handler.sh", config);
                config.set("mapred.reduce.task.debug.script", "custom_reduce_error_handler.sh " + job.jobtitle);
                config.set("mapreduce.reduce.debug.script", "custom_reduce_error_handler.sh " + job.jobtitle);
                DistributedCache.createSymlink(config);
                DistributedCache.addCacheFile(scriptToCache.toUri(), config);
            } catch (Exception e) {
            } // just carry on

        } //TODO (???): TOTEST

        // (need to do these 2 things here before the job is created, at which point the config class has been copied across)
        //1)
        Class<?> mapperClazz = Class.forName(job.mapper, true, child);
        if (ICustomInfiniteInternalEngine.class.isAssignableFrom(mapperClazz)) { // Special case: internal custom engine, so gets an additional integration hook
            ICustomInfiniteInternalEngine preActivities = (ICustomInfiniteInternalEngine) mapperClazz
                    .newInstance();
            preActivities.preTaskActivities(job._id, job.communityIds, config, !(bTestMode || bLocalMode));
        } //TESTED
          //2)
        if (job.inputCollection.equalsIgnoreCase("file.binary_shares")) {
            // Need to download the GridFSZip file
            try {
                Path jarToCache = InfiniteHadoopUtils.cacheLocalFile("/opt/infinite-home/lib/unbundled/",
                        "GridFSZipFile.jar", config);
                DistributedCache.addFileToClassPath(jarToCache, config);
            } catch (Throwable t) {
            } // (this is fine, will already be on the classpath .. otherwise lots of other stuff will be failing all over the place!)            
        }

        if (job.inputCollection.equals("records")) {

            InfiniteElasticsearchHadoopUtils.handleElasticsearchInput(job, config, advancedConfigurationDbo);

            //(won't run under 0.19 so running with "records" should cause all sorts of exceptions)

        } //TESTED (by hand)         

        if (bTestMode || bLocalMode) { // If running locally, turn "snappy" off - tomcat isn't pointing its native library path in the right place
            config.set("mapred.map.output.compression.codec", "org.apache.hadoop.io.compress.DefaultCodec");
        }

        // Manually specified caches
        List<URL> localJarCaches = InfiniteHadoopUtils.handleCacheList(advancedConfigurationDbo.get("$caches"),
                job, config, props_custom);

        Job hj = new Job(config); // (NOTE: from here, changes to config are ignored)
        try {

            if (null != localJarCaches) {
                if (bLocalMode || bTestMode) {
                    Method method = URLClassLoader.class.getDeclaredMethod("addURL", new Class[] { URL.class });
                    method.setAccessible(true);
                    method.invoke(child, localJarCaches.toArray());

                } //TOTEST (tested logically)
            }
            Class<?> classToLoad = Class.forName(job.mapper, true, child);
            hj.setJarByClass(classToLoad);

            if (job.inputCollection.equalsIgnoreCase("filesystem")) {
                String inputPath = null;
                try {
                    inputPath = MongoDbUtil.getProperty(advancedConfigurationDbo, "file.url");
                    if (!inputPath.endsWith("/")) {
                        inputPath = inputPath + "/";
                    }
                } catch (Exception e) {
                }
                if (null == inputPath) {
                    throw new RuntimeException("Must specify 'file.url' if reading from filesystem.");
                }
                inputPath = InfiniteHadoopUtils.authenticateInputDirectory(job, inputPath);

                InfiniteFileInputFormat.addInputPath(hj, new Path(inputPath + "*/*")); // (that extra bit makes it recursive)
                InfiniteFileInputFormat.setMaxInputSplitSize(hj, 33554432); // (32MB)
                InfiniteFileInputFormat.setInfiniteInputPathFilter(hj, config);
                hj.setInputFormatClass((Class<? extends InputFormat>) Class.forName(
                        "com.ikanow.infinit.e.data_model.custom.InfiniteFileInputFormat", true, child));
            } else if (job.inputCollection.equalsIgnoreCase("file.binary_shares")) {

                String[] oidStrs = null;
                try {
                    String inputPath = MongoDbUtil.getProperty(advancedConfigurationDbo, "file.url");
                    Pattern oidExtractor = Pattern.compile("inf://share/([^/]+)");
                    Matcher m = oidExtractor.matcher(inputPath);
                    if (m.find()) {
                        oidStrs = m.group(1).split("\\s*,\\s*");

                    } else {
                        throw new RuntimeException(
                                "file.url must be in format inf://share/<oid-list>/<string>: " + inputPath);
                    }
                    InfiniteHadoopUtils.authenticateShareList(job, oidStrs);
                } catch (Exception e) {
                    throw new RuntimeException(
                            "Authentication error: " + e.getMessage() + ": " + advancedConfigurationDbo, e);
                }

                hj.getConfiguration().setStrings("mapred.input.dir", oidStrs);
                hj.setInputFormatClass((Class<? extends InputFormat>) Class.forName(
                        "com.ikanow.infinit.e.data_model.custom.InfiniteShareInputFormat", true, child));
            } else if (job.inputCollection.equals("records")) {
                hj.setInputFormatClass((Class<? extends InputFormat>) Class
                        .forName("com.ikanow.infinit.e.data_model.custom.InfiniteEsInputFormat", true, child));
            } else {
                if (esMode) {
                    hj.setInputFormatClass((Class<? extends InputFormat>) Class.forName(
                            "com.ikanow.infinit.e.processing.custom.utils.InfiniteElasticsearchMongoInputFormat",
                            true, child));
                } else {
                    hj.setInputFormatClass((Class<? extends InputFormat>) Class.forName(
                            "com.ikanow.infinit.e.data_model.custom.InfiniteMongoInputFormat", true, child));
                }
            }
            if ((null != job.exportToHdfs) && job.exportToHdfs) {

                //TODO (INF-2469): Also, if the output key is BSON then also run as text (but output as JSON?)

                Path outPath = InfiniteHadoopUtils.ensureOutputDirectory(job, props_custom);

                if ((null != job.outputKey) && (null != job.outputValue)
                        && job.outputKey.equalsIgnoreCase("org.apache.hadoop.io.text")
                        && job.outputValue.equalsIgnoreCase("org.apache.hadoop.io.text")) {
                    // (slight hack before I sort out the horrendous job class - if key/val both text and exporting to HDFS then output as Text)
                    hj.setOutputFormatClass((Class<? extends OutputFormat>) Class
                            .forName("org.apache.hadoop.mapreduce.lib.output.TextOutputFormat", true, child));
                    TextOutputFormat.setOutputPath(hj, outPath);
                } //TESTED
                else {
                    hj.setOutputFormatClass((Class<? extends OutputFormat>) Class.forName(
                            "org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat", true, child));
                    SequenceFileOutputFormat.setOutputPath(hj, outPath);
                } //TESTED
            } else { // normal case, stays in MongoDB
                hj.setOutputFormatClass((Class<? extends OutputFormat>) Class.forName(
                        "com.ikanow.infinit.e.data_model.custom.InfiniteMongoOutputFormat", true, child));
            }
            hj.setMapperClass((Class<? extends Mapper>) mapperClazz);
            String mapperOutputKeyOverride = advancedConfigurationDbo.getString("$mapper_key_class", null);
            if (null != mapperOutputKeyOverride) {
                hj.setMapOutputKeyClass(Class.forName(mapperOutputKeyOverride));
            } //TESTED 

            String mapperOutputValueOverride = advancedConfigurationDbo.getString("$mapper_value_class", null);
            if (null != mapperOutputValueOverride) {
                hj.setMapOutputValueClass(Class.forName(mapperOutputValueOverride));
            } //TESTED 

            if ((null != job.reducer) && !job.reducer.startsWith("#") && !job.reducer.equalsIgnoreCase("null")
                    && !job.reducer.equalsIgnoreCase("none")) {
                hj.setReducerClass((Class<? extends Reducer>) Class.forName(job.reducer, true, child));
                // Variable reducers:
                if (null != job.query) {
                    try {
                        hj.setNumReduceTasks(advancedConfigurationDbo.getInt("$reducers", 1));
                    } catch (Exception e) {
                        try {
                            // (just check it's not a string that is a valid int)
                            hj.setNumReduceTasks(
                                    Integer.parseInt(advancedConfigurationDbo.getString("$reducers", "1")));
                        } catch (Exception e2) {
                        }
                    }
                } //TESTED
            } else {
                hj.setNumReduceTasks(0);
            }
            if ((null != job.combiner) && !job.combiner.startsWith("#")
                    && !job.combiner.equalsIgnoreCase("null") && !job.combiner.equalsIgnoreCase("none")) {
                hj.setCombinerClass((Class<? extends Reducer>) Class.forName(job.combiner, true, child));
            }
            hj.setOutputKeyClass(Class.forName(job.outputKey, true, child));
            hj.setOutputValueClass(Class.forName(job.outputValue, true, child));

            hj.setJobName(job.jobtitle);
            currJobName = job.jobtitle;
        } catch (Error e) { // (messing about with class loaders = lots of chances for errors!)
            throw new RuntimeException(e.getMessage(), e);
        }
        if (bTestMode || bLocalMode) {
            hj.submit();
            currThreadId = null;
            Logger.getRootLogger().addAppender(this);
            currLocalJobId = hj.getJobID().toString();
            currLocalJobErrs.setLength(0);
            while (!hj.isComplete()) {
                Thread.sleep(1000);
            }
            Logger.getRootLogger().removeAppender(this);
            if (hj.isSuccessful()) {
                if (this.currLocalJobErrs.length() > 0) {
                    return "local_done: " + this.currLocalJobErrs.toString();
                } else {
                    return "local_done";
                }
            } else {
                return "Error: " + this.currLocalJobErrs.toString();
            }
        } else {
            hj.submit();
            String jobId = hj.getJobID().toString();
            return jobId;
        }
    } catch (Exception e) {
        e.printStackTrace();
        Thread.currentThread().setContextClassLoader(savedClassLoader);
        return "Error: " + InfiniteHadoopUtils.createExceptionMessage(e);
    } finally {
        Thread.currentThread().setContextClassLoader(savedClassLoader);
    }
}

From source file:com.ikanow.infinit.e.processing.custom.launcher.CustomHadoopTaskLauncher.java

License:Open Source License

private void createConfigXML(Writer out, String title, String input, String fields, boolean isCustomTable,
        String outputDatabase, String output, String tempOutputCollection, String mapper, String reducer,
        String combiner, String query, List<ObjectId> communityIds, String outputKey, String outputValue,
        String arguments, Boolean incrementalMode, ObjectId userId, Boolean selfMerge,
        String originalOutputCollection, Boolean appendResults) throws IOException {
    String dbserver = prop_general.getDatabaseServer();
    output = outputDatabase + "." + tempOutputCollection;

    boolean isAdmin = AuthUtils.isAdmin(userId);

    int nSplits = 8;
    int nDocsPerSplit = 12500;

    //add communities to query if this is not a custom table
    BasicDBObject oldQueryObj = null;
    BasicDBObject srcTags = null;/*from   ww  w .  j a v  a  2  s. c  o m*/
    // Start with the old query:
    if (query.startsWith("{")) {
        oldQueryObj = (BasicDBObject) com.mongodb.util.JSON.parse(query);
    } else {
        oldQueryObj = new BasicDBObject();
    }
    boolean elasticsearchQuery = oldQueryObj.containsField("qt") && !isCustomTable;
    int nLimit = 0;
    if (oldQueryObj.containsField("$limit")) {
        nLimit = oldQueryObj.getInt("$limit");
        oldQueryObj.remove("$limit");
    }
    if (oldQueryObj.containsField("$splits")) {
        nSplits = oldQueryObj.getInt("$splits");
        oldQueryObj.remove("$splits");
    }
    if (oldQueryObj.containsField("$srctags")) {
        srcTags = new BasicDBObject(SourcePojo.tags_, oldQueryObj.get("$srctags"));
        oldQueryObj.remove("$srctags");
    }
    if (bLocalMode) { // If in local mode, then set this to a large number so we always run inside our limit/split version
        // (since for some reason MongoInputFormat seems to fail on large collections)
        nSplits = InfiniteMongoSplitter.MAX_SPLITS;
    }
    if (oldQueryObj.containsField("$docsPerSplit")) {
        nDocsPerSplit = oldQueryObj.getInt("$docsPerSplit");
        oldQueryObj.remove("$docsPerSplit");
    }
    oldQueryObj.remove("$fields");
    oldQueryObj.remove("$output");
    oldQueryObj.remove("$reducers");
    String mapperKeyClass = oldQueryObj.getString("$mapper_key_class", "");
    String mapperValueClass = oldQueryObj.getString("$mapper_value_class", "");
    oldQueryObj.remove("$mapper_key_class");
    oldQueryObj.remove("$mapper_value_class");
    String cacheList = null;
    Object cacheObj = oldQueryObj.get("$caches");
    if (null != cacheObj) {
        cacheList = cacheObj.toString(); // (either array of strings, or single string)
        if (!cacheList.startsWith("[")) {
            cacheList = "[" + cacheList + "]"; // ("must" now be valid array)
        }
        oldQueryObj.remove("$caches");
    } //TESTED

    if (null != nDebugLimit) { // (debug mode override)
        nLimit = nDebugLimit;
    }
    boolean tmpIncMode = (null != incrementalMode) && incrementalMode;

    Date fromOverride = null;
    Date toOverride = null;
    Object fromOverrideObj = oldQueryObj.remove("$tmin");
    Object toOverrideObj = oldQueryObj.remove("$tmax");
    if (null != fromOverrideObj) {
        fromOverride = InfiniteHadoopUtils.dateStringFromObject(fromOverrideObj, true);
    }
    if (null != toOverrideObj) {
        toOverride = InfiniteHadoopUtils.dateStringFromObject(toOverrideObj, false);
    }

    if (!isCustomTable) {
        if (elasticsearchQuery) {
            oldQueryObj.put("communityIds", communityIds);
            //tmin/tmax not supported - already have that capability as part of the query
        } else {
            if (input.equals("feature.temporal")) {
                if ((null != fromOverride) || (null != toOverride)) {
                    oldQueryObj.put("value.maxTime",
                            InfiniteHadoopUtils.createDateRange(fromOverride, toOverride, true));
                } //TESTED
                oldQueryObj.put("_id.c", new BasicDBObject(DbManager.in_, communityIds));
            } else {
                oldQueryObj.put(DocumentPojo.communityId_, new BasicDBObject(DbManager.in_, communityIds));
                if ((null != fromOverride) || (null != toOverride)) {
                    oldQueryObj.put("_id",
                            InfiniteHadoopUtils.createDateRange(fromOverride, toOverride, false));
                } //TESTED         
                if (input.equals("doc_metadata.metadata")) {
                    oldQueryObj.put(DocumentPojo.index_, new BasicDBObject(DbManager.ne_, "?DEL?")); // (ensures not soft-deleted)
                }
            }
        }
    } else {
        if ((null != fromOverride) || (null != toOverride)) {
            oldQueryObj.put("_id", InfiniteHadoopUtils.createDateRange(fromOverride, toOverride, false));
        } //TESTED
          //get the custom table (and database)
        input = CustomOutputManager.getCustomDbAndCollection(input);
    }
    query = oldQueryObj.toString();

    if (arguments == null)
        arguments = "";

    // Generic configuration
    out.write("<?xml version=\"1.0\"?>\n<configuration>");

    // Mongo specific configuration
    out.write("\n\t<property><!-- name of job shown in jobtracker --><name>mongo.job.name</name><value>" + title
            + "</value></property>"
            + "\n\t<property><!-- run the job verbosely ? --><name>mongo.job.verbose</name><value>true</value></property>"
            + "\n\t<property><!-- Run the job in the foreground and wait for response, or background it? --><name>mongo.job.background</name><value>false</value></property>"
            + "\n\t<property><!-- If you are reading from mongo, the URI --><name>mongo.input.uri</name><value>mongodb://"
            + dbserver + "/" + input + "</value></property>"
            + "\n\t<property><!-- If you are writing to mongo, the URI --><name>mongo.output.uri</name><value>mongodb://"
            + dbserver + "/" + output + "</value>  </property>"
            + "\n\t<property><!-- The query, in JSON, to execute [OPTIONAL] --><name>mongo.input.query</name><value>"
            + StringEscapeUtils.escapeXml(query) + "</value></property>"
            + "\n\t<property><!-- The fields, in JSON, to read [OPTIONAL] --><name>mongo.input.fields</name><value>"
            + ((fields == null) ? ("") : fields) + "</value></property>"
            + "\n\t<property><!-- A JSON sort specification for read [OPTIONAL] --><name>mongo.input.sort</name><value></value></property>"
            + "\n\t<property><!-- The number of documents to limit to for read [OPTIONAL] --><name>mongo.input.limit</name><value>"
            + nLimit + "</value><!-- 0 == no limit --></property>"
            + "\n\t<property><!-- The number of documents to skip in read [OPTIONAL] --><!-- TODO - Are we running limit() or skip() first? --><name>mongo.input.skip</name><value>0</value> <!-- 0 == no skip --></property>"
            + "\n\t<property><!-- Class for the mapper --><name>mongo.job.mapper</name><value>" + mapper
            + "</value></property>"
            + "\n\t<property><!-- Reducer class --><name>mongo.job.reducer</name><value>" + reducer
            + "</value></property>"
            + "\n\t<property><!-- InputFormat Class --><name>mongo.job.input.format</name><value>com.ikanow.infinit.e.data_model.custom.InfiniteMongoInputFormat</value></property>"
            + "\n\t<property><!-- OutputFormat Class --><name>mongo.job.output.format</name><value>com.ikanow.infinit.e.data_model.custom.InfiniteMongoOutputFormat</value></property>"
            + "\n\t<property><!-- Output key class for the output format --><name>mongo.job.output.key</name><value>"
            + outputKey + "</value></property>"
            + "\n\t<property><!-- Output value class for the output format --><name>mongo.job.output.value</name><value>"
            + outputValue + "</value></property>"
            + "\n\t<property><!-- Output key class for the mapper [optional] --><name>mongo.job.mapper.output.key</name><value>"
            + mapperKeyClass + "</value></property>"
            + "\n\t<property><!-- Output value class for the mapper [optional] --><name>mongo.job.mapper.output.value</name><value>"
            + mapperValueClass + "</value></property>"
            + "\n\t<property><!-- Class for the combiner [optional] --><name>mongo.job.combiner</name><value>"
            + combiner + "</value></property>"
            + "\n\t<property><!-- Partitioner class [optional] --><name>mongo.job.partitioner</name><value></value></property>"
            + "\n\t<property><!-- Sort Comparator class [optional] --><name>mongo.job.sort_comparator</name><value></value></property>"
            + "\n\t<property><!-- Split Size [optional] --><name>mongo.input.split_size</name><value>32</value></property>");

    // Infinit.e specific configuration

    out.write("\n\t<property><!-- User Arguments [optional] --><name>infinit.e.userid</name><value>"
            + StringEscapeUtils.escapeXml(userId.toString()) + "</value></property>"
            + "\n\t<property><!-- User Arguments [optional] --><name>arguments</name><value>"
            + StringEscapeUtils.escapeXml(arguments) + "</value></property>"
            + "\n\t<property><!-- Maximum number of splits [optional] --><name>max.splits</name><value>"
            + nSplits + "</value></property>"
            + "\n\t<property><!-- Maximum number of docs per split [optional] --><name>max.docs.per.split</name><value>"
            + nDocsPerSplit + "</value></property>"
            + "\n\t<property><!-- Infinit.e incremental mode [optional] --><name>update.incremental</name><value>"
            + tmpIncMode + "</value></property>"
            + "\n\t<property><!-- Infinit.e quick admin check [optional] --><name>infinit.e.is.admin</name><value>"
            + isAdmin + "</value></property>"
            + "\n\t<property><!-- Infinit.e userid [optional] --><name>infinit.e.userid</name><value>" + userId
            + "</value></property>");
    if (null != cacheList) {
        out.write(
                "\n\t<property><!-- Infinit.e cache list [optional] --><name>infinit.e.cache.list</name><value>"
                        + cacheList + "</value></property>");
    } //TESTED
    if (null != srcTags) {
        out.write(
                "\n\t<property><!-- Infinit.e src tags filter [optional] --><name>infinit.e.source.tags.filter</name><value>"
                        + srcTags.toString() + "</value></property>");
    }

    if (null != selfMerge && selfMerge && originalOutputCollection != null) {
        originalOutputCollection = "mongodb://" + dbserver + "/" + outputDatabase + "."
                + originalOutputCollection;
        out.write(
                "\n\t<property><!-- This jobs output collection for passing into the mapper along with input collection [optional] --><name>infinit.e.selfMerge</name><value>"
                        + originalOutputCollection + "</value></property>");
    }

    // Closing thoughts:
    out.write("\n</configuration>");

    out.flush();
    out.close();
}

From source file:com.ikanow.infinit.e.processing.custom.launcher.CustomSavedQueryQueueLauncher.java

License:Open Source License

public static void createAlertDocSummary(StringBuffer alertEmailText, int docNum, int numDocSummaries,
        BasicDBObject doc, String rootUrl) {
    if (docNum < numDocSummaries) {
        // Preamble on the first doc
        if (0 == docNum) {
            alertEmailText.append("<p>");
            alertEmailText.append("Top ").append(numDocSummaries);
            if (0 == numDocSummaries) {
                alertEmailText.append(" document:");
            } else {
                alertEmailText.append(" documents:");
            }/*from   ww w  .  j av  a 2 s . c o  m*/
            alertEmailText.append("</p>");
            alertEmailText.append("\n");
            alertEmailText.append("<ol>");
            alertEmailText.append("\n");
        }
        // Docs:         
        StringBuffer guiQuery = new StringBuffer("{\"qt\":[{\"ftext\":\"_id:")
                .append(doc.getObjectId(DocumentPojo._id_)).append("\"}]}");
        String url = doc.getString(DocumentPojo.displayUrl_, doc.getString(DocumentPojo.url_));
        String title = doc.getString(DocumentPojo.title_, url);
        alertEmailText.append("<li/>");
        alertEmailText.append(title);
        alertEmailText.append(" [");
        alertEmailText.append(doc.getDate(DocumentPojo.publishedDate_, doc.getDate(DocumentPojo.created_)));
        alertEmailText.append("]");
        alertEmailText.append(" (");
        alertEmailText.append("<a href=\"").append(rootUrl);
        try {
            alertEmailText.append("?query=");
            alertEmailText.append(URLEncoder.encode(guiQuery.toString(), "UTF-8"));
            alertEmailText.append("&communityIds=").append(
                    doc.getObjectId(DocumentPojo.communityId_, new ObjectId("4c927585d591d31d7b37097a")));
        } catch (Exception e) {
        } // (just carry on)
        alertEmailText.append("\">");
        alertEmailText.append("GUI</a>)");
        if ((null != url) && (url.startsWith("http"))) {
            alertEmailText.append(" (");
            alertEmailText.append("<a href=\"").append(url).append("\">");
            alertEmailText.append("External Link</a>)");
        }
        alertEmailText.append("\n");
    }
}

From source file:com.ikanow.infinit.e.processing.custom.utils.CustomApiUtils.java

License:Apache License

public static void getJobResults(ResponsePojo rp, CustomMapReduceJobPojo cmr, int limit, String fields,
        String findStr, String sortStr, boolean bCsv) {

    BasicDBObject queryDbo = null;// w w  w .  ja v  a  2  s.  c  o m
    if (null != findStr) {
        queryDbo = (BasicDBObject) com.mongodb.util.JSON.parse(findStr);
    } else {
        queryDbo = new BasicDBObject();
    } //TOTEST

    BasicDBObject fieldsDbo = new BasicDBObject();
    if (null != fields) {
        fieldsDbo = (BasicDBObject) com.mongodb.util.JSON.parse("{" + fields + "}");
    }

    //return the results:

    // Need to handle sorting...
    BasicDBObject sort = null;
    if (null != sortStr) { //override
        sort = (BasicDBObject) com.mongodb.util.JSON.parse(sortStr);
    } else { //defaults
        String sortField = "_id";
        int sortDir = 1;
        BasicDBObject postProcObject = (BasicDBObject) com.mongodb.util.JSON.parse(
                InfiniteHadoopUtils.getQueryOrProcessing(cmr.query, InfiniteHadoopUtils.QuerySpec.POSTPROC));
        if (postProcObject != null) {
            sortField = postProcObject.getString("sortField", "_id");
            sortDir = postProcObject.getInt("sortDirection", 1);
        } //TESTED (post proc and no post proc)
        sort = new BasicDBObject(sortField, sortDir);
    } //TOTEST

    // Case 1: DB
    rp.setResponse(new ResponseObject("Custom Map Reduce Job Results", true,
            "Map reduce job completed at: " + cmr.lastCompletionTime));
    if ((null == cmr.exportToHdfs) || !cmr.exportToHdfs) {
        DBCursor resultCursor = null;
        DBCollection coll = DbManager.getCollection(cmr.getOutputDatabase(), cmr.outputCollection);
        DBDecoderFactory defaultDecoder = coll.getDBDecoderFactory();
        CsvGeneratingBsonDecoder csvDecoder = null;
        SizeReportingBasicBSONDecoder sizeDecoder = null;
        CustomMapReduceResultPojo cmrr = new CustomMapReduceResultPojo();
        try {
            if (bCsv) {
                coll.setDBDecoderFactory((csvDecoder = new CsvGeneratingBsonDecoder()));
            } else {
                coll.setDBDecoderFactory((sizeDecoder = new SizeReportingBasicBSONDecoder()));
            }
            if (limit > 0) {
                resultCursor = coll.find(queryDbo, fieldsDbo).sort(sort).limit(limit);
            } else {
                resultCursor = coll.find(queryDbo, fieldsDbo).sort(sort);
            }
            LinkedList<BasicDBObject> list = null;
            if (!bCsv) {
                list = new LinkedList<BasicDBObject>();
            }
            final int MAX_SIZE_CSV = 80 * 1024 * 1024; //(80MB)
            final int MAX_SIZE_JSON = 80 * 1024 * 1024; //(80MB)
            while (resultCursor.hasNext()) {
                BasicDBObject x = (BasicDBObject) resultCursor.next();
                if (!bCsv) {
                    list.add(x);
                }
                if (null != csvDecoder) {
                    if (csvDecoder.getCsv().length() > MAX_SIZE_CSV) {
                        break;
                    }
                } else if (null != sizeDecoder) {
                    if (sizeDecoder.getSize() > MAX_SIZE_JSON) {
                        break;
                    }
                }
            }
            cmrr.results = list;
        } finally {
            coll.setDBDecoderFactory(defaultDecoder);
        }
        cmrr.lastCompletionTime = cmr.lastCompletionTime;
        if (null != csvDecoder) {
            StringBuffer header = new StringBuffer();
            for (String field : csvDecoder.getOrderedFields()) {
                if (0 != header.length()) {
                    header.append(',');
                }
                header.append('"');
                header.append(field.replace("\"", "\\\""));
                header.append("\"");
            }
            header.append('\n');
            header.append(csvDecoder.getCsv().toString());
            cmrr.results = header.toString();
        }
        rp.setData(cmrr);
    } //TESTED
    else { // Case 2: HDFS

        if ((null != cmr.outputKey) && (null != cmr.outputValue)
                && cmr.outputKey.equalsIgnoreCase("org.apache.hadoop.io.text")
                && cmr.outputValue.equalsIgnoreCase("org.apache.hadoop.io.text")) {
            // special case, text file
            try {
                rp.setData(HadoopUtils.getBsonFromTextFiles(cmr, limit, fields),
                        (BasePojoApiMap<BasicDBList>) null);
            } catch (Exception e) {
                rp.setResponse(new ResponseObject("Custom Map Reduce Job Results", false,
                        "Files don't appear to be in text file format, did you run the job before changing the output to Text/Text?"));
            }
        } //TESTED
        else { // sequence file
            try {
                rp.setData(HadoopUtils.getBsonFromSequenceFile(cmr, limit, fields),
                        (BasePojoApiMap<BasicDBList>) null);
            } catch (Exception e) {
                rp.setResponse(new ResponseObject("Custom Map Reduce Job Results", false,
                        "Files don't appear to be in sequence file format, did you run the job with Text/Text?"));
            }
        } //TESTED
    } //TESTED      
}

From source file:com.ikanow.infinit.e.processing.custom.utils.SourcePipelineToCustomConversion.java

License:Apache License

public static void convertSourcePipeline(SourcePojo in, List<CustomMapReduceJobPojo> out,
        boolean testNotCreateMode) {
    BasicDBObject query = new BasicDBObject();
    BasicDBObject queryOutput = null; // (holds complex object)

    // Not sure if this will be string or JSON object..
    StringBuffer args = null;/*from   w w w .jav  a  2 s  . c o  m*/
    BasicDBObject argsJson = null;

    boolean haveInput = false;
    SourcePipelinePojo scorecard = new SourcePipelinePojo();

    List<String> caches = new LinkedList<String>();

    // Create a generic-ish set of fields for the job
    CustomMapReduceJobPojo job = handleInitializeOrGetJob(in, testNotCreateMode);

    // Now modify the fields based on the processing pipeline
    if (null != in.getProcessingPipeline())
        for (SourcePipelinePojo px : in.getProcessingPipeline()) {

            if (null != px.custom_datastoreQuery) {
                if (haveInput)
                    throw new RuntimeException("Currently only support one input block");
                haveInput = true;

                job.isCustomTable = true;

                job.inputCollection = px.custom_datastoreQuery.customTable;
                query = handleCommonInFields(px.custom_datastoreQuery.query, px.custom_datastoreQuery.fields,
                        px.custom_datastoreQuery.tmin, px.custom_datastoreQuery.tmax, null, null);
            } else if (null != px.custom_file) { // HDFS or Ikanow share
                if (haveInput)
                    throw new RuntimeException("Currently only support one input block");
                haveInput = true;

                SourcePojo temp = new SourcePojo();
                temp.setFileConfig(px.custom_file);
                BasicDBObject fileObj = (BasicDBObject) temp.toDb().get(SourcePojo.file_);
                query = new BasicDBObject(SourcePojo.file_, fileObj);

                String url = fileObj.getString("url", "will_error_later");

                if (url.startsWith("inf://share/")) {
                    job.inputCollection = "file.binary_shares";
                } else {
                    fileObj.put("url", url.replace("hdfs:///", "/").replace("hdfs:", "")); // (get rid of leading hdfs:)
                    job.inputCollection = "filesystem";
                }
            } else if (null != px.docs_datastoreQuery) {
                if (haveInput)
                    throw new RuntimeException("Currently only support one input block");
                haveInput = true;

                if (ContentMode.content == px.docs_datastoreQuery.contentMode) {
                    job.inputCollection = "doc_content.gzip_content";
                } else if ((null == px.docs_datastoreQuery.contentMode)
                        || (ContentMode.metadata == px.docs_datastoreQuery.contentMode)) {
                    job.inputCollection = "doc_metadata.metadata";
                } else {
                    throw new RuntimeException(
                            "Both content + metadata in the same job: not currently supported");
                }
                query = handleCommonInFields(px.docs_datastoreQuery.query, px.docs_datastoreQuery.fields,
                        px.docs_datastoreQuery.tmin, px.docs_datastoreQuery.tmax,
                        px.docs_datastoreQuery.srcTags, null);
            } else if (null != px.docs_documentQuery) {
                if (haveInput)
                    throw new RuntimeException("Currently only support one input block");
                haveInput = true;

                job.inputCollection = "doc_metadata.metadata";

                query = handleDocumentQuery(px.docs_documentQuery.query, in, job);
            } else if (null != px.records_indexQuery) {
                if (haveInput)
                    throw new RuntimeException("Currently only support one input block");
                haveInput = true;

                job.inputCollection = "records";
                query = handleCommonInFields(null, null, px.records_indexQuery.tmin, px.records_indexQuery.tmax,
                        null, new BasicDBObject());
                if (null != px.records_indexQuery.query) {
                    if (px.records_indexQuery.query.trim().startsWith("{")) {
                        query.put("query", com.mongodb.util.JSON.parse(px.records_indexQuery.query));
                    } else {
                        query.put("query", px.records_indexQuery.query);
                    }
                }
                if (null != px.records_indexQuery.filter) {
                    if (px.records_indexQuery.filter.trim().startsWith("{")) {
                        query.put("filter", com.mongodb.util.JSON.parse(px.records_indexQuery.filter));
                    } else {
                        query.put("filter", px.records_indexQuery.filter);
                    }
                }
                if (null != px.records_indexQuery.types) {
                    query.put("$types", px.records_indexQuery.types);
                }
                if (null != px.records_indexQuery.streamingMode) {
                    if (StreamingMode.stashed == px.records_indexQuery.streamingMode) {
                        query.put("$streaming", false);
                    } else if (StreamingMode.streaming == px.records_indexQuery.streamingMode) {
                        query.put("$streaming", true);
                    }
                    //(else don't set $streaming, defaults to both)
                }
                // (else don't set $streaming, defaults to both)
            } else if (null != px.feature_datastoreQuery) {
                if (haveInput)
                    throw new RuntimeException("Currently only support one input block");
                haveInput = true;

                if (FeatureName.association == px.feature_datastoreQuery.featureName) {
                    job.inputCollection = "feaure.association";
                } else if (FeatureName.entity == px.feature_datastoreQuery.featureName) {
                    job.inputCollection = "feaure.entity";
                } else if (FeatureName.temporal == px.feature_datastoreQuery.featureName) {
                    job.inputCollection = "feaure.temporal";
                }
                query = handleCommonInFields(px.feature_datastoreQuery.query, px.feature_datastoreQuery.fields,
                        px.feature_datastoreQuery.tmin, px.feature_datastoreQuery.tmax, null, null);
            } else if (null != px.extraInputSettings) {
                if (!haveInput)
                    throw new RuntimeException("Job must start with an input block");

                handleGroupOverride(px.extraInputSettings.groupOverrideList,
                        px.extraInputSettings.groupOverrideRegex, job, in);

                if (null != px.extraInputSettings.debugLimit) {
                    query.put("$limit", px.extraInputSettings.debugLimit);
                }
                if (null != px.extraInputSettings.docsPerSplitOverride) {
                    query.put("$docsPerSplit", px.extraInputSettings.docsPerSplitOverride);
                }
                if (null != px.extraInputSettings.numSplitsOverride) {
                    query.put("$splits", px.extraInputSettings.numSplitsOverride);
                }
            } else if (null != px.scheduler) {
                if (null != scorecard.scheduler)
                    throw new RuntimeException("Only support one scheduler");
                scorecard.scheduler = px.scheduler;

                boolean isDisabled = false;
                if (null == px.scheduler.frequency) {
                    px.scheduler.frequency = CustomScheduler.FrequencyMode.disabled;
                }
                if (CustomScheduler.FrequencyMode.once_only == px.scheduler.frequency) {
                    job.scheduleFreq = SCHEDULE_FREQUENCY.NONE;
                } else if (CustomScheduler.FrequencyMode.hourly == px.scheduler.frequency) {
                    job.scheduleFreq = SCHEDULE_FREQUENCY.HOURLY;
                } else if (CustomScheduler.FrequencyMode.daily == px.scheduler.frequency) {
                    job.scheduleFreq = SCHEDULE_FREQUENCY.DAILY;
                } else if (CustomScheduler.FrequencyMode.weekly == px.scheduler.frequency) {
                    job.scheduleFreq = SCHEDULE_FREQUENCY.WEEKLY;
                } else if (CustomScheduler.FrequencyMode.monthly == px.scheduler.frequency) {
                    job.scheduleFreq = SCHEDULE_FREQUENCY.MONTHLY;
                } else if (CustomScheduler.FrequencyMode.disabled == px.scheduler.frequency) {
                    isDisabled = true;
                    job.scheduleFreq = SCHEDULE_FREQUENCY.NONE;
                    job.nextRunTime = CustomApiUtils.DONT_RUN_TIME;
                } else if (CustomScheduler.FrequencyMode.ondemand == px.scheduler.frequency) {
                    isDisabled = true;
                    job.nextRunTime = CustomApiUtils.DONT_RUN_TIME; //01-01-2099 in milliseconds! Will use this constant to mean "dont' run" - CustomHandler.DONT_RUN_TIME               

                    //TODO (INF-2865): to implement
                    throw new RuntimeException("'OnDemand' not yet supported");
                }

                if (!isDisabled) {
                    if (null != scorecard.scheduler.runDate) {
                        Date d = InfiniteHadoopUtils.dateStringFromObject(scorecard.scheduler.runDate, true);
                        if (null != d) {
                            // Special case: if once_only and runDate < now then update it
                            if (CustomScheduler.FrequencyMode.once_only == px.scheduler.frequency) {
                                long now = new Date().getTime();
                                if (d.getTime() < now) {
                                    job.nextRunTime = now;
                                } else {
                                    job.nextRunTime = d.getTime();
                                }
                            } else {
                                // (otherwise retain it so that it gets used to determine the next time)
                                job.nextRunTime = d.getTime();
                            }
                        }
                    } else if (Long.MAX_VALUE == job.nextRunTime) { // (ie not set => field left at its default)
                        job.nextRunTime = new Date().getTime();
                    }
                    if ((null == job.firstSchedule)
                            || (CustomApiUtils.DONT_RUN_TIME == job.firstSchedule.getTime())) {
                        // (ie if firstSchedule not set then set it)
                        job.firstSchedule = new Date(job.nextRunTime);
                    }
                } //(else already set)

                if (null != scorecard.scheduler.autoDependency) {
                    //(will eventually automatically automatically generate a dependency on any custom input tables)
                    //TODO (INF-2865): to implement
                    throw new RuntimeException("'Automatic dependencies' not yet supported");
                }

                if (null != scorecard.scheduler.dependencies) {
                    try {
                        job.jobDependencies = new HashSet<ObjectId>(scorecard.scheduler.dependencies.size());
                        for (String depId : scorecard.scheduler.dependencies) {
                            job.jobDependencies.add(new ObjectId(depId));
                        }
                    } catch (Exception e) {
                        throw new RuntimeException("Custom Scheduler Dependencies: invalid Dependency in "
                                + Arrays.toString(scorecard.scheduler.dependencies.toArray()));
                    }
                }

                // First time through, can overwrite some of the fields: 
                if ((null == in.getHarvestStatus()) || (null == in.getHarvestStatus().getHarvest_status())) {
                    job.timesRan = 0; // (if we're setting the initial override, then need to ensure that it's unset after running)
                    job.timesFailed = 0;

                    // Unset any tmin/tmax/srctags fields if set to " "s
                    String tminOver = px.scheduler.tmin_initialOverride;
                    String tmaxOver = px.scheduler.tmax_initialOverride;
                    String srctagsOver = px.scheduler.srcTags_initialOverride;
                    if (null != tminOver) {
                        tminOver = tminOver.trim(); // (hence will be ignored)
                        if (tminOver.isEmpty()) {
                            query.remove("$tmin");
                        }
                    }
                    if (null != tmaxOver) {
                        tmaxOver = tmaxOver.trim();
                        if (tmaxOver.isEmpty()) {
                            query.remove("$tmax");
                        }
                    }
                    if (null != srctagsOver) {
                        srctagsOver = srctagsOver.trim();
                        if (srctagsOver.isEmpty()) {
                            query.remove("$srctags");
                        }
                    } //TESTED (custom_scheduler_test_2, custom_scheduler_test_1)

                    if (null == px.scheduler.query_initialOverride) { // easy, just override fields from existing query
                        query = handleCommonInFields(null, null, tminOver, tmaxOver, srctagsOver, query);
                    } //TESTED (custom_scheduler_test_1)
                    else { // one extra complication ... if tmin/tmax/srctags _aren't_ overridden then use originals instead
                        if (null == tminOver)
                            tminOver = query.getString("$tmin");
                        if (null == tmaxOver)
                            tmaxOver = query.getString("$tmax");
                        if (null == srctagsOver)
                            srctagsOver = query.getString("$srctags");
                        query = handleCommonInFields(px.scheduler.query_initialOverride, null, tminOver,
                                tmaxOver, srctagsOver, null);
                    } //TESTED (custom_scheduler_test_2 - some fields override (+ve or -ve), some pulled from original)
                }
                //TESTED (that first time through harvest|harvest.status==null, subsequently not)
            } else if (null != px.artefacts) {
                if (!haveInput)
                    throw new RuntimeException("Job must start with an input block");

                if (null != px.artefacts.mainJar) {
                    String jar = null;
                    // A few options:
                    // $infinite/.../<id> or <id> or a URL
                    try {
                        jar = new ObjectId(px.artefacts.mainJar).toString();
                        jar = "$infinite/share/get/" + jar;
                    } catch (Exception e) {
                    } // fall through to...

                    if (null == jar) {
                        jar = px.artefacts.mainJar;
                    }
                    job.jarURL = jar;
                }
                if (null != px.artefacts.extraJars) {
                    for (String jarId : px.artefacts.extraJars) {
                        caches.add(jarId);
                    }
                }
                if (null != px.artefacts.joinTables) {
                    for (String shareId : px.artefacts.joinTables) {
                        caches.add(shareId);
                    }
                }
                if (null != px.artefacts.selfJoin) {
                    job.selfMerge = px.artefacts.selfJoin;
                }
            } else if (null != px.mapper) {
                if (!haveInput)
                    throw new RuntimeException("Job must start with an input block");

                if (null != scorecard.scriptingEngine)
                    throw new RuntimeException("Can't have a scriptingEngine and mapper");
                if (null != scorecard.hadoopEngine)
                    throw new RuntimeException("Can't have a hadoopEngine and mapper");
                if (null != scorecard.mapper)
                    throw new RuntimeException("Currently only support one mapper");
                scorecard.mapper = px.mapper;

                job.mapper = px.mapper.mapperClass;

                if (null != px.mapper.mapperKeyClass) {
                    query.put("$mapper_key_class", px.mapper.mapperKeyClass);
                }
                if (null != px.mapper.mapperValueClass) {
                    query.put("$mapper_value_class", px.mapper.mapperValueClass);
                }
            } else if (null != px.combiner) {
                if (!haveInput)
                    throw new RuntimeException("Job must start with an input block");

                if (null != scorecard.scriptingEngine)
                    throw new RuntimeException("Can't have a scriptingEngine and combiner");
                if (null != scorecard.hadoopEngine)
                    throw new RuntimeException("Can't have a hadoopEngine and combiner");
                if (null != scorecard.combiner)
                    throw new RuntimeException("Currently only support one combiner");
                scorecard.combiner = px.combiner;

                job.combiner = px.combiner.combinerClass;
            } else if (null != px.reducer) {
                if (!haveInput)
                    throw new RuntimeException("Job must start with an input block");

                if (null != scorecard.scriptingEngine)
                    throw new RuntimeException("Can't have a scriptingEngine and reducer");
                if (null != scorecard.hadoopEngine)
                    throw new RuntimeException("Can't have a hadoopEngine and reducer");
                if (null != scorecard.reducer)
                    throw new RuntimeException("Currently only support one reducer");
                scorecard.reducer = px.reducer;

                job.reducer = px.reducer.reducerClass;

                if (null != px.reducer.numReducers) {
                    query.put("$reducers", px.reducer.numReducers);
                }
                if (null != px.reducer.outputKeyClass) {
                    job.outputKey = px.reducer.outputKeyClass;
                }
                if (null != px.reducer.outputValueClass) {
                    job.outputValue = px.reducer.outputValueClass;
                }
            } else if (null != px.hadoopEngine) {
                if (!haveInput)
                    throw new RuntimeException("Job must start with an input block");

                if (null != scorecard.scriptingEngine)
                    throw new RuntimeException("Only one of: scriptingEngine, hadoopEngine");
                if (null != scorecard.hadoopEngine)
                    throw new RuntimeException("Only support one hadoopEngine");
                if (null != scorecard.mapper)
                    throw new RuntimeException("Can't have a hadoopEngine and mapper");
                if (null != scorecard.combiner)
                    throw new RuntimeException("Can't have a hadoopEngine and combiner");
                if (null != scorecard.reducer)
                    throw new RuntimeException("Can't have a hadoopEngine and reducer");
                scorecard.hadoopEngine = px.hadoopEngine;

                if (null != px.hadoopEngine.mainJar) {
                    String jar = null;
                    // A few options:
                    // $infinite/.../<id> or <id> or a URL
                    try {
                        jar = new ObjectId(px.hadoopEngine.mainJar).toString();
                        jar = "$infinite/share/get/" + jar;
                    } catch (Exception e) {
                    } // fall through to...

                    if (null == jar) {
                        jar = px.hadoopEngine.mainJar;
                    }
                    job.jarURL = jar;
                }

                job.mapper = px.hadoopEngine.mapperClass;
                if (null != px.hadoopEngine.combinerClass) {
                    job.combiner = px.hadoopEngine.combinerClass;
                } else {
                    job.combiner = "none";
                }
                if (null != px.hadoopEngine.reducerClass) {
                    job.reducer = px.hadoopEngine.reducerClass;
                } else {
                    job.reducer = "none";
                }
                job.outputKey = px.hadoopEngine.outputKeyClass;
                job.outputValue = px.hadoopEngine.outputValueClass;

                if (null != px.hadoopEngine.mapperKeyClass) {
                    query.put("$mapper_key_class", px.hadoopEngine.mapperKeyClass);
                }
                if (null != px.hadoopEngine.mapperValueClass) {
                    query.put("$mapper_value_class", px.hadoopEngine.mapperValueClass);
                }
                if (null != px.hadoopEngine.numReducers) {
                    query.put("$reducers", px.hadoopEngine.numReducers);
                }

                if (null != px.hadoopEngine.configuration) {
                    if (px.hadoopEngine.configuration.trim().startsWith("{")) {
                        argsJson = (BasicDBObject) com.mongodb.util.JSON.parse(px.hadoopEngine.configuration);
                        if (null != px.hadoopEngine.configParams)
                            for (Map.Entry<String, String> param : px.hadoopEngine.configParams.entrySet()) {
                                argsJson.put(param.getKey(), param.getValue());
                            }
                    } else {
                        args = new StringBuffer(px.hadoopEngine.configuration);
                        if (null != px.hadoopEngine.configParams) {
                            throw new RuntimeException(
                                    "Can only specify hadoopEngine.configParams when hadoopEngine.configuration is in JSON format");
                        }
                    }
                } else {
                    args = new StringBuffer(); // (ie just "")
                }
            } else if (null != px.scriptingEngine) {
                if (!haveInput)
                    throw new RuntimeException("Job must start with an input block");

                if (null != scorecard.hadoopEngine)
                    throw new RuntimeException("Only one of: scriptingEngine, hadoopEngine");
                if (null != scorecard.scriptingEngine)
                    throw new RuntimeException("Only support one scriptingEngine");
                if (null != scorecard.mapper)
                    throw new RuntimeException("Can't have a scriptingEngine and mapper");
                if (null != scorecard.combiner)
                    throw new RuntimeException("Can't have a scriptingEngine and combiner");
                if (null != scorecard.reducer)
                    throw new RuntimeException("Can't have a scriptingEngine and reducer");
                scorecard.scriptingEngine = px.scriptingEngine;

                //TODO (INF-2865): handle jython scripting engine (mainJar and also the classes below)
                job.jarURL = InfiniteHadoopUtils.BUILT_IN_JOB_PATH;

                args = new StringBuffer();

                if (null != px.scriptingEngine.numReducers) {
                    query.put("$reducers", px.scriptingEngine.numReducers);
                }

                if (null != px.scriptingEngine.memoryOptimized) {
                    args.append("_memoryOptimization = ").append(px.scriptingEngine.memoryOptimized)
                            .append(";\n\n");
                }
                if ((null != px.scriptingEngine.globalScript) && !px.scriptingEngine.globalScript.isEmpty()) {
                    args.append(px.scriptingEngine.globalScript).append("\n\n");
                }

                job.mapper = "com.ikanow.infinit.e.utility.hadoop.HadoopPrototypingTool$JavascriptMapper";
                if ((null != px.scriptingEngine.mapScript) && !px.scriptingEngine.mapScript.isEmpty()) {
                    args.append(px.scriptingEngine.mapScript).append("\n\n");
                }
                if ((null != px.scriptingEngine.combineScript) && !px.scriptingEngine.combineScript.isEmpty()) {
                    job.combiner = "com.ikanow.infinit.e.utility.hadoop.HadoopPrototypingTool$JavascriptCombiner";
                    args.append(px.scriptingEngine.combineScript).append("\n\n");
                } else {
                    job.combiner = "#com.ikanow.infinit.e.utility.hadoop.HadoopPrototypingTool$JavascriptCombiner";
                }
                if ((null != px.scriptingEngine.reduceScript) && !px.scriptingEngine.reduceScript.isEmpty()) {
                    job.reducer = "com.ikanow.infinit.e.utility.hadoop.HadoopPrototypingTool$JavascriptReducer";
                    args.append(px.scriptingEngine.reduceScript).append("\n\n");
                } else {
                    job.reducer = "#com.ikanow.infinit.e.utility.hadoop.HadoopPrototypingTool$JavascriptReducer";
                }
                job.outputKey = "com.mongodb.hadoop.io.BSONWritable";
                job.outputValue = "com.mongodb.hadoop.io.BSONWritable";
            } else if (null != px.tableOutput) {
                if (!haveInput)
                    throw new RuntimeException("Job must start with an input block");

                if (null != scorecard.tableOutput)
                    throw new RuntimeException("Only support one tableOutput");
                scorecard.tableOutput = px.tableOutput;

                if (null != px.tableOutput.ageOut_days) {
                    job.appendAgeOutInDays = px.tableOutput.ageOut_days;
                }
                if (null != px.tableOutput.globalObjectLimit) {
                    if (null == queryOutput) {
                        queryOutput = new BasicDBObject();
                        query.put("$output", queryOutput);
                    }
                    queryOutput.put("limit", px.tableOutput.globalObjectLimit);
                    queryOutput.put("limitAllData", true);
                }
                if (null != px.tableOutput.perCycleObjectLimit) {
                    if (null != px.tableOutput.globalObjectLimit) {
                        throw new RuntimeException(
                                "Currently can support only one of: globalObjectLimit, perCycleObjectLimit in tableOutput");
                    }

                    if (null == queryOutput) {
                        queryOutput = new BasicDBObject();
                        query.put("$output", queryOutput);
                    }
                    queryOutput.put("limit", px.tableOutput.globalObjectLimit);
                    queryOutput.put("limitAllData", false);
                }
                if (null != px.tableOutput.sortDirection) {
                    if (null == queryOutput) {
                        queryOutput = new BasicDBObject();
                        query.put("$output", queryOutput);
                    }
                    queryOutput.put("sortDirection", px.tableOutput.sortDirection);
                }
                if (null != px.tableOutput.sortField) {
                    if (null == queryOutput) {
                        queryOutput = new BasicDBObject();
                        query.put("$output", queryOutput);
                    }
                    queryOutput.put("sortField", px.tableOutput.sortField);
                }
                if (null != px.tableOutput.appendMode) {
                    if (AppendMode.append_merge == px.tableOutput.appendMode) {
                        job.appendResults = true;
                        job.incrementalMode = false;
                    } else if (AppendMode.append_reduce == px.tableOutput.appendMode) {
                        job.appendResults = true;
                        job.incrementalMode = true;
                    }
                    //(else leave alone)
                }
                if (null != px.tableOutput.dataStoreIndexes) {
                    if (null == queryOutput) {
                        queryOutput = new BasicDBObject();
                        query.put("$output", queryOutput);
                    }
                    queryOutput.put("indexed", com.mongodb.util.JSON.parse(px.tableOutput.dataStoreIndexes));
                }
                if (!testNotCreateMode) {
                    if (null != px.tableOutput.indexed) {
                        if (px.tableOutput.indexed) {
                            if (null == queryOutput) {
                                queryOutput = new BasicDBObject();
                                query.put("$output", queryOutput);
                            }
                            queryOutput.put("indexMode", "custom");
                        }
                    }
                }
                if (null != px.tableOutput.postFixName) {
                    throw new RuntimeException(
                            "Can't currently specify a postFix for job names - job name == source key");
                }
            }
            //(don't allow any other output types in test mode?)

        } //(end loop over pipeline elements)

    completeJob(job, query, caches, (null != args) ? args.toString() : null, argsJson, scorecard);
    out.add(job);
}