List of usage examples for com.mongodb BasicDBObject getString
public String getString(final String key, final String def)
From source file:com.epam.dlab.auth.dao.UserInfoDAOMongoImpl.java
License:Apache License
@Override public UserInfo getUserInfoByAccessToken(String accessToken) { BasicDBObject uiSearchDoc = new BasicDBObject(); uiSearchDoc.put("_id", accessToken); MongoCollection<BasicDBObject> mc = ms.getCollection("security", BasicDBObject.class); FindIterable<BasicDBObject> res = mc.find(uiSearchDoc); BasicDBObject uiDoc = res.first(); if (uiDoc == null) { log.warn("UI not found {}", accessToken); return null; }/*from w ww . j ava 2 s . c o m*/ Date lastAccess = uiDoc.getDate("expireAt"); if (inactiveUserTimeoutMsec < Math.abs(new Date().getTime() - lastAccess.getTime())) { log.warn("UI for {} expired but were not evicted from DB. Contact MongoDB admin to create expireable " + "index" + " on 'expireAt' key.", accessToken); this.deleteUserInfo(accessToken); return null; } String name = uiDoc.get("name").toString(); String firstName = uiDoc.getString("firstName", ""); String lastName = uiDoc.getString("lastName", ""); String remoteIp = uiDoc.getString("remoteIp", ""); BasicDBList roles = (BasicDBList) uiDoc.get("roles"); Boolean awsUser = uiDoc.getBoolean("awsUser", false); UserInfo ui = new UserInfo(name, accessToken); ui.setFirstName(firstName); ui.setLastName(lastName); ui.setRemoteIp(remoteIp); ui.setAwsUser(awsUser); Object awsKeys = uiDoc.get("awsKeys"); if (awsKeys != null) { ((BasicDBObject) awsKeys).forEach((key, val) -> ui.addKey(key, val.toString())); } roles.forEach(o -> ui.addRole("" + o)); log.debug("Found persistent {}", ui); return ui; }
From source file:com.ikanow.aleph2.v1.document_db.utils.LegacyV1HadoopUtils.java
License:Open Source License
/** parse the V1 query string * @param query//from w w w . j a va 2s . com * @return the required objects embedded in various tuples */ public static Tuple4<String, Tuple2<Integer, Integer>, BasicDBObject, DBObject> parseQueryObject( final String query, final List<String> community_ids) { // Some fixed variables just to avoid changing the guts of the (tested in v1) code final boolean isCustomTable = false; @SuppressWarnings("unused") Integer nDebugLimit = null; final boolean bLocalMode = false; @SuppressWarnings("unused") final Boolean incrementalMode = null; final String input = "doc_metadata.metadata"; // Output objects final String out_query; int nSplits = 8; int nDocsPerSplit = 12500; List<ObjectId> communityIds = community_ids.stream().map(s -> new ObjectId(s)).collect(Collectors.toList()); //C/P code: //add communities to query if this is not a custom table BasicDBObject oldQueryObj = null; BasicDBObject srcTags = null; // Start with the old query: if (query.startsWith("{")) { oldQueryObj = (BasicDBObject) com.mongodb.util.JSON.parse(query); } else { oldQueryObj = new BasicDBObject(); } boolean elasticsearchQuery = oldQueryObj.containsField("qt") && !isCustomTable; @SuppressWarnings("unused") int nLimit = 0; if (oldQueryObj.containsField(":limit")) { nLimit = oldQueryObj.getInt(":limit"); oldQueryObj.remove(":limit"); } if (oldQueryObj.containsField(":splits")) { nSplits = oldQueryObj.getInt(":splits"); oldQueryObj.remove(":splits"); } if (oldQueryObj.containsField(":srctags")) { srcTags = new BasicDBObject(SourcePojo.tags_, oldQueryObj.get(":srctags")); oldQueryObj.remove(":srctags"); } if (bLocalMode) { // If in local mode, then set this to a large number so we always run inside our limit/split version // (since for some reason MongoInputFormat seems to fail on large collections) nSplits = InfiniteMongoSplitter.MAX_SPLITS; } if (oldQueryObj.containsField(":docsPerSplit")) { nDocsPerSplit = oldQueryObj.getInt(":docsPerSplit"); oldQueryObj.remove(":docsPerSplit"); } final DBObject fields = (DBObject) oldQueryObj.remove(":fields"); oldQueryObj.remove(":output"); oldQueryObj.remove(":reducers"); @SuppressWarnings("unused") String mapperKeyClass = oldQueryObj.getString(":mapper_key_class", ""); @SuppressWarnings("unused") String mapperValueClass = oldQueryObj.getString(":mapper_value_class", ""); oldQueryObj.remove(":mapper_key_class"); oldQueryObj.remove(":mapper_value_class"); String cacheList = null; Object cacheObj = oldQueryObj.get(":caches"); if (null != cacheObj) { cacheList = cacheObj.toString(); // (either array of strings, or single string) if (!cacheList.startsWith("[")) { cacheList = "[" + cacheList + "]"; // ("must" now be valid array) } oldQueryObj.remove(":caches"); } //TESTED // if (null != nDebugLimit) { // (debug mode override) // nLimit = nDebugLimit; // } // boolean tmpIncMode = ( null != incrementalMode) && incrementalMode; @SuppressWarnings("unused") String otherCollections = null; Date fromOverride = null; Date toOverride = null; Object fromOverrideObj = oldQueryObj.remove(":tmin"); Object toOverrideObj = oldQueryObj.remove(":tmax"); if (null != fromOverrideObj) { fromOverride = dateStringFromObject(fromOverrideObj, true); } if (null != toOverrideObj) { toOverride = dateStringFromObject(toOverrideObj, false); } if (!isCustomTable) { if (elasticsearchQuery) { oldQueryObj.put("communityIds", communityIds); //tmin/tmax not supported - already have that capability as part of the query } else { if (input.equals("feature.temporal")) { if ((null != fromOverride) || (null != toOverride)) { oldQueryObj.put("value.maxTime", createDateRange(fromOverride, toOverride, true)); } //TESTED oldQueryObj.put("_id.c", new BasicDBObject(DbManager.in_, communityIds)); } else { oldQueryObj.put(DocumentPojo.communityId_, new BasicDBObject(DbManager.in_, communityIds)); if ((null != fromOverride) || (null != toOverride)) { oldQueryObj.put(JsonUtils._ID, createDateRange(fromOverride, toOverride, false)); } //TESTED if (input.equals("doc_metadata.metadata")) { oldQueryObj.put(DocumentPojo.index_, new BasicDBObject(DbManager.ne_, "?DEL?")); // (ensures not soft-deleted) } } } } else { throw new RuntimeException("Custom Tables not currently supported (no plans to)"); // if ((null != fromOverride) || (null != toOverride)) { // oldQueryObj.put(JsonUtils._ID, createDateRange(fromOverride, toOverride, false)); // }//TESTED // //get the custom table (and database) // // String[] candidateInputs = input.split("\\s*,\\s*"); // input = CustomOutputManager.getCustomDbAndCollection(candidateInputs[0]); // if (candidateInputs.length > 1) { // otherCollections = Arrays.stream(candidateInputs) // .skip(1L) // .map(i -> CustomOutputManager.getCustomDbAndCollection(i)) // .map(i -> "mongodb://"+dbserver+"/"+i).collect(Collectors.joining("|")); // } } out_query = oldQueryObj.toString(); return Tuples._4T(out_query, Tuples._2T(nSplits, nDocsPerSplit), srcTags, fields); }
From source file:com.ikanow.infinit.e.api.knowledge.federated.SimpleFederatedQueryEngine.java
License:Open Source License
@Override public void postQueryActivities(ObjectId queryId, List<BasicDBObject> docs, ResponsePojo response) { boolean grabbedScores = false; double aggregateSignif = 100.0; double queryRelevance = 100.0; double score = 100.0; if (null != _asyncRequestsPerQuery) { int added = 0; BasicDBList bsonArray = new BasicDBList(); PeekingIterator<FederatedRequest> it = Iterators.peekingIterator(_asyncRequestsPerQuery.iterator()); while (it.hasNext()) { // loop state: BasicDBObject[] docOrDocs = new BasicDBObject[1]; docOrDocs[0] = null;/*from w ww. j a v a 2s. c om*/ FederatedRequest request = it.next(); boolean isComplexSource = isComplexSource(request.endpointInfo.parentSource); if (null == request.cachedDoc) { // no cached doc, simple source processing (OR ANY COMPLEX CASE BY CONSTRUCTION) try { if ((null == request.cachedResult) || isComplexSource) { // no cached api response, or complex if (null != request.importThread) { // 1) wait for the thread to finish if (null == request.endpointInfo.queryTimeout_secs) { request.endpointInfo.queryTimeout_secs = 300; } for (int timer = 0; timer < request.endpointInfo.queryTimeout_secs; timer++) { try { request.importThread.join(1000L); if (!request.importThread.isAlive()) { break; } } //TESTED (by hand) catch (Exception e) { //(carry on) } } if (request.importThread.isAlive()) { request.errorMessage = new RuntimeException("Script timed out"); } //TESTED (by hand) // 2) Get the results if (null != request.errorMessage) { if (_testMode) { throw new RuntimeException(request.errorMessage); } } else if (isComplexSource) { //DEBUG if (_DEBUG) _logger.debug("DEB: postQA0: " + request.complexSourceProcResults.size()); handleComplexDocCaching(request, _cacheMode, _scoreStats); // Get a list of docs docOrDocs = ((BasicDBList) DocumentPojo .listToDb(request.complexSourceProcResults, DocumentPojo.listType())) .toArray(new BasicDBObject[0]); // (_API_ caching is exactly the same between cache and non-cache cases) // (note that if null != complexSourceProcResults then follows that null != scriptResult) String url = buildScriptUrl(request.mergeKey, request.queryIndex); if (!(request.importThread instanceof FederatedSimpleHarvest) && _cacheMode) { // (don't cache python federated queries in test mode) // (simple harvest caching is done separately) this.cacheApiResponse(url, request.scriptResult, request.endpointInfo); } } //TESTED (by hand - single and multiple doc mode) else if (null == request.scriptResult) { if (_testMode) { throw new RuntimeException("Script mode: no cached result found from: " + request.requestParameter); } } else { // (_API_ caching is exactly the same between cache and non-cache cases) String url = buildScriptUrl(request.mergeKey, request.queryIndex); if (_cacheMode) { // (don't cache python federated queries in test mode) this.cacheApiResponse(url, request.scriptResult, request.endpointInfo); } bsonArray.add(request.scriptResult); } } // end script mode else { // HTTP mode (also: must be simple source builder) Response endpointResponse = request.responseFuture.get(); request.asyncClient.close(); request.asyncClient = null; String jsonStr = endpointResponse.getResponseBody(); String url = endpointResponse.getUri().toURL().toString(); Object bsonUnknownType = com.mongodb.util.JSON.parse(jsonStr); BasicDBObject bson = null; if (bsonUnknownType instanceof BasicDBObject) { bson = (BasicDBObject) bsonUnknownType; } else if (bsonUnknownType instanceof BasicDBList) { bson = new BasicDBObject(SimpleFederatedCache.array_, bsonUnknownType); } else if (bsonUnknownType instanceof String) { bson = new BasicDBObject(SimpleFederatedCache.value_, bsonUnknownType); } //DEBUG if (_DEBUG) _logger.debug("DEB: postQA1: " + url + ": " + jsonStr); if (null != bson) { MongoDbUtil.enforceTypeNamingPolicy(bson, 0); this.cacheApiResponse(url, bson, request.endpointInfo); bsonArray.add(bson); } } //(end script vs request method) } //TESTED (3.1, 4.2) else { // (just used cached value) //DEBUG if (_DEBUG) _logger.debug("DEB: postQA2: " + request.cachedResult.toString()); bsonArray.add( (BasicDBObject) request.cachedResult.get(SimpleFederatedCache.cachedJson_)); } //TESTED (4.1, 4.3) } catch (Exception e) { //DEBUG if (null == request.subRequest) { _logger.error("Error with script: " + e.getMessage()); if (_testMode) { throw new RuntimeException("Error with script: " + e.getMessage(), e); } } else { _logger.error("Error with " + request.subRequest.endPointUrl + ": " + e.getMessage()); if (_testMode) { throw new RuntimeException( "Error with " + request.subRequest.endPointUrl + ": " + e.getMessage(), e); } } } if (null == docOrDocs[0]) { // (this next bit of logic can only occur in simple source cases by construction, phew) if (!it.hasNext() || (request.mergeKey != it.peek().mergeKey)) { // deliberate ptr arithmetic String url = buildScriptUrl(request.mergeKey, request.queryIndex); //DEBUG if (_DEBUG) _logger.debug("DEB: postQA3: " + url + ": " + bsonArray); docOrDocs[0] = createDocFromJson(bsonArray, url, request, request.endpointInfo); } } } // (end if no cached doc) else { // cached doc, bypass lots of processing because no merging and doc already built (simple source processing) docOrDocs[0] = request.cachedDoc; } //TESTED (by hand) if (null != docOrDocs[0]) for (BasicDBObject doc : docOrDocs) { // Cache the document unless already cached (or caching disabled) if ((null == request.cachedDoc) && _cacheMode && !isComplexSource && ((null == request.endpointInfo.cacheTime_days) || (request.endpointInfo.cacheTime_days >= 0))) { simpleDocCache(request, doc); } //TESTED (by hand, 3 cases: cached not expired, cached expired first time, cached expired multiple times) if (!grabbedScores) { if (!docs.isEmpty()) { BasicDBObject topDoc = docs.get(0); aggregateSignif = topDoc.getDouble(DocumentPojo.aggregateSignif_, aggregateSignif); queryRelevance = topDoc.getDouble(DocumentPojo.queryRelevance_, queryRelevance); score = topDoc.getDouble(DocumentPojo.score_, score); grabbedScores = true; // OK would also like to grab the original matching entity, if it exists if (!isComplexSource) { BasicDBList ents = (BasicDBList) topDoc.get(DocumentPojo.entities_); if (null != ents) { for (Object entObj : ents) { BasicDBObject ent = (BasicDBObject) entObj; String entIndex = ent.getString(EntityPojo.index_, ""); if (entIndex.equals(request.queryIndex)) { ents = (BasicDBList) doc.get(DocumentPojo.entities_); if (null != ents) { ents.add(ent); } break; } } } //TESTED (by hand) } } } doc.put(DocumentPojo.aggregateSignif_, aggregateSignif); doc.put(DocumentPojo.queryRelevance_, queryRelevance); doc.put(DocumentPojo.score_, score); // Swap id and updateId, everything's been cached now: // Handle update ids vs normal ids: ObjectId updateId = (ObjectId) doc.get(DocumentPojo.updateId_); if (null != updateId) { // swap the 2... doc.put(DocumentPojo.updateId_, doc.get(DocumentPojo._id_)); doc.put(DocumentPojo._id_, updateId); } //TESTED (by hand) // If we're returning to a query then we'll adjust the doc format (some of the atomic fields become arrays) if (!_testMode) { convertDocToQueryFormat(doc, request.communityIdStrs); } //TESTED (by hand) docs.add(0, doc); added++; //(doc auto reset at top of loop) //(end if built a doc from the last request/set of requests) } //TESTED (3.1) } //(end loop over federated requests) if (null != response.getStats()) { response.getStats().found += added; } //TESTED (by hand) } }
From source file:com.ikanow.infinit.e.core.utils.SourceUtils.java
License:Open Source License
private static boolean updateHarvestDistributionState_tokenComplete(SourcePojo source, HarvestEnum harvestStatus, BasicDBObject incClause, BasicDBObject setClause) { // Update tokens complete, and retrieve modified version int nTokensToBeCleared = source.getDistributionTokens().size(); BasicDBObject query = new BasicDBObject(SourcePojo._id_, source.getId()); BasicDBObject modify = new BasicDBObject(MongoDbManager.inc_, new BasicDBObject( SourceHarvestStatusPojo.sourceQuery_distributionTokensComplete_, nTokensToBeCleared)); BasicDBObject fields = new BasicDBObject(SourceHarvestStatusPojo.sourceQuery_distributionTokensComplete_, 1);// w ww. j a v a 2 s . com fields.put(SourceHarvestStatusPojo.sourceQuery_harvest_status_, 1); fields.put(SourceHarvestStatusPojo.sourceQuery_distributionReachedLimit_, 1); BasicDBObject partial = (BasicDBObject) MongoDbManager.getIngest().getSource().findAndModify(query, fields, null, false, modify, true, false); //(return new version - ensures previous increments have been taken into account) // Two cases: source complete (all tokens obtained), source incomplete: if (null != partial) { // (else yikes!) BasicDBObject partialStatus = (BasicDBObject) partial.get(SourcePojo.harvest_); if (null != partialStatus) { // (else yikes!) int nTokensComplete = partialStatus.getInt(SourceHarvestStatusPojo.distributionTokensComplete_, 0); // (note after increment) // COMPLETE: reset parameters, status -> error (if anything has errored), success (all done), success_iteration (more to do) if (nTokensComplete == source.getDistributionFactor()) { if (!source.reachedMaxDocs()) { // (Can only do this if we've finished the source... //...else the different threads can be at different points, so the most recent doc for one thread might be // before the most recent doc of another) setClause.put(SourceHarvestStatusPojo.sourceQuery_distributedLastCompletedCycle_, new Date()); } setClause.put(SourceHarvestStatusPojo.sourceQuery_distributionTokensComplete_, 0); setClause.put(SourceHarvestStatusPojo.sourceQuery_distributionTokensFree_, source.getDistributionFactor()); setClause.put(SourceHarvestStatusPojo.sourceQuery_distributionReachedLimit_, false); // (resetting this) // This source is now complete String status = partialStatus.getString(SourceHarvestStatusPojo.harvest_status_, null); Boolean reachedLimit = partialStatus.getBoolean( SourceHarvestStatusPojo.distributionReachedLimit_, false) || source.reachedMaxDocs(); if ((null != status) && ((status.equalsIgnoreCase(HarvestEnum.error.toString()) || (HarvestEnum.error == harvestStatus)))) { setClause.put(SourceHarvestStatusPojo.sourceQuery_harvest_status_, HarvestEnum.error.toString()); } //TESTED (current and previous state == error) else if (reachedLimit || (HarvestEnum.success_iteration == harvestStatus)) { setClause.put(SourceHarvestStatusPojo.sourceQuery_harvest_status_, HarvestEnum.success_iteration.toString()); } //TESTED (from previous or current state) // (else leave with default of success) //DEBUG //System.out.println(Thread.currentThread().getName() + " COMPLETE_SRC COMPLETE_TOKEN=" + source.getKey() + " / " + setClause.toString() + " / " + incClause.toString() + " / " + nTokensComplete); return true; } //TESTED else { // Not complete // If we're here then we're only allowed to update the status to error if (HarvestEnum.error != harvestStatus) { setClause.remove(SourceHarvestStatusPojo.sourceQuery_harvest_status_); } //TESTED if (source.reachedMaxDocs()) { setClause.put(SourceHarvestStatusPojo.sourceQuery_distributionReachedLimit_, true); } //TESTED //DEBUG //System.out.println(Thread.currentThread().getName() + " COMPLETE_TOKEN=" + source.getKey() + " / " + setClause.toString() + " / " + incClause.toString() + " / " + nTokensComplete); return false; } //(end is complete or not) //TESTED (reached max limit) } //(end found partial source status, else catastrophic failure) } //(end found partial source, else catastrophic failure) return false; }
From source file:com.ikanow.infinit.e.harvest.extraction.document.logstash.LogstashHarvester.java
License:Open Source License
@Override public void executeHarvest(HarvestContext context, SourcePojo source, List<DocumentPojo> toAdd, List<DocumentPojo> toUpdate, List<DocumentPojo> toRemove) { if (ElasticSearchManager.getVersion() < 100) { context.getHarvestStatus().update(source, new Date(), HarvestEnum.error, "This version of infinit.e (elasticsearch version < 1.0) does not support logstash, you will need to upgrade to v0.3 and ensure your elasticsearch instance is upgraded.", true, false);/*from w w w .java2s . com*/ return; } if (context.isStandalone()) { // test mode // Get the configuration String logStashConfig = null; SourcePipelinePojo logstashElement = null; for (SourcePipelinePojo pxPipe : source.getProcessingPipeline()) { /// (must be non null if here) if (null != pxPipe.logstash) { logStashConfig = pxPipe.logstash.config; logstashElement = pxPipe; } break; } if ((null == logStashConfig) || logStashConfig.isEmpty()) { context.getHarvestStatus().update(source, new Date(), HarvestEnum.error, "Logstash internal logic error, no configuration", true, false); return; } // Admin check (passed down) boolean isAdmin = AuthUtils.isAdmin(source.getOwnerId()); // Perform the request ObjectId requestId = new ObjectId(); BasicDBObject logQ = new BasicDBObject("_id", requestId); boolean removeJobWhenDone = true; try { // (See records.service for the programmatic definition of this message) logstashElement.logstash.config = logStashConfig; BasicDBObject logStashDbo = (BasicDBObject) logstashElement.toDb(); logStashDbo.put("_id", requestId); logStashDbo.put("maxDocs", context.getStandaloneMaxDocs()); logStashDbo.put("sourceKey", source.getKey()); logStashDbo.put("isAdmin", isAdmin); // Step 0: place request on Q DbManager.getIngest().getLogHarvesterQ().save(logStashDbo); // Step 1: has my request been serviced: boolean serviced = false; String error = null; final int WAIT_TIME_2_MINS = 120; for (int time = 0; time < WAIT_TIME_2_MINS; time += 5) { // (allow 2 minutes for servicing) //1. have i been removed from queue? //2. check size of logstash queue - is it decreasing try { Thread.sleep(5000); // check every 5s logStashDbo = (BasicDBObject) DbManager.getIngest().getLogHarvesterQ().findOne(logQ); if (null == logStashDbo) { removeJobWhenDone = false; serviced = true; break; // found! } //TESTED error = logStashDbo.getString("error", null); if (null != error) { break; // bad! } //TESTED } catch (Exception e) { } } if (!serviced) { DbManager.getIngest().getLogHarvesterQ().remove(logQ); removeJobWhenDone = false; if (null == error) { context.getHarvestStatus().update(source, new Date(), HarvestEnum.error, "Logstash service appears not to be running", true, false); } else { context.getHarvestStatus().update(source, new Date(), HarvestEnum.error, "Logstash service reports error: " + error, true, false); } //TESTED return; } //TESTED // Step 2: get data from the queue final int WAIT_TIME_5_MINS = 300; for (int time = 0; time < WAIT_TIME_5_MINS; time += 5) { // (allow 5 minutes for processing) logStashDbo = (BasicDBObject) DbManager.getIngest().getLogHarvesterQ().findOne(logQ); if (null != logStashDbo) { // if it reappears then there's been an error so handle and exit DbManager.getIngest().getLogHarvesterQ().remove(logQ); removeJobWhenDone = false; long count = DbManager.getCollection("ingest", requestId.toString()).count(); if (count > 0) { DBCursor dbc = DbManager.getCollection("ingest", requestId.toString()).find() .limit(context.getStandaloneMaxDocs()); for (Object o : dbc) { DocumentPojo doc = new DocumentPojo(); doc.addToMetadata("record", o); toAdd.add(doc); } error = logStashDbo.getString("error", "no info"); context.getHarvestStatus().update(source, new Date(), HarvestEnum.success, "Logstash service info: " + error, false, false); break; } //TESTED else { // Then it's an error: error = logStashDbo.getString("error", null); if (error == null) { if (0 == context.getStandaloneMaxDocs()) { context.getHarvestStatus().update(source, new Date(), HarvestEnum.success, "Logstash service info: success", false, false); break; } else { error = "unknown error"; } } //TESTED context.getHarvestStatus().update(source, new Date(), HarvestEnum.error, "Logstash service reports error: " + error, true, false); return; } //TESTED } //TESTED try { Thread.sleep(5000); // check every 5s } catch (Exception e) { } } // (end loop while waiting for docs) } finally { // just to be on the safe side... if (removeJobWhenDone) { DbManager.getIngest().getLogHarvesterQ().remove(logQ); } try { DbManager.getCollection("ingest", requestId.toString()).drop(); } catch (Exception e) { } // that's fine it just doesn't exist } } else { context.getHarvestStatus().update(source, new Date(), HarvestEnum.error, "Tried to harvest logstash data internally", true, false); return; } }
From source file:com.ikanow.infinit.e.processing.custom.launcher.CustomHadoopTaskLauncher.java
License:Open Source License
@SuppressWarnings({ "unchecked", "rawtypes" }) public String runHadoopJob(CustomMapReduceJobPojo job, String tempJarLocation) throws IOException, SAXException, ParserConfigurationException { StringWriter xml = new StringWriter(); String outputCollection = job.outputCollectionTemp;// (non-append mode) if ((null != job.appendResults) && job.appendResults) outputCollection = job.outputCollection; // (append mode, write directly in....) else if (null != job.incrementalMode) job.incrementalMode = false; // (not allowed to be in incremental mode and not update mode) createConfigXML(xml, job.jobtitle, job.inputCollection, InfiniteHadoopUtils.getQueryOrProcessing(job.query, InfiniteHadoopUtils.QuerySpec.INPUTFIELDS), job.isCustomTable, job.getOutputDatabase(), job._id.toString(), outputCollection, job.mapper, job.reducer, job.combiner,/*from ww w . j ava2 s . co m*/ InfiniteHadoopUtils.getQueryOrProcessing(job.query, InfiniteHadoopUtils.QuerySpec.QUERY), job.communityIds, job.outputKey, job.outputValue, job.arguments, job.incrementalMode, job.submitterID, job.selfMerge, job.outputCollection, job.appendResults); ClassLoader savedClassLoader = Thread.currentThread().getContextClassLoader(); URLClassLoader child = new URLClassLoader(new URL[] { new File(tempJarLocation).toURI().toURL() }, savedClassLoader); Thread.currentThread().setContextClassLoader(child); // Check version: for now, any infinit.e.data_model with an VersionTest class is acceptable boolean dataModelLoaded = true; try { URLClassLoader versionTest = new URLClassLoader(new URL[] { new File(tempJarLocation).toURI().toURL() }, null); try { Class.forName("com.ikanow.infinit.e.data_model.custom.InfiniteMongoInputFormat", true, versionTest); } catch (ClassNotFoundException e2) { //(this is fine, will use the cached version) dataModelLoaded = false; } if (dataModelLoaded) Class.forName("com.ikanow.infinit.e.data_model.custom.InfiniteMongoVersionTest", true, versionTest); } catch (ClassNotFoundException e1) { throw new RuntimeException( "This JAR is compiled with too old a version of the data-model, please recompile with Jan 2014 (rc2) onwards"); } // Now load the XML into a configuration object: Configuration config = new Configuration(); // Add the client configuration overrides: if (!bLocalMode) { String hadoopConfigPath = props_custom.getHadoopConfigPath() + "/hadoop/"; config.addResource(new Path(hadoopConfigPath + "core-site.xml")); config.addResource(new Path(hadoopConfigPath + "mapred-site.xml")); config.addResource(new Path(hadoopConfigPath + "hadoop-site.xml")); } //TESTED try { DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance(); DocumentBuilder dBuilder = dbFactory.newDocumentBuilder(); Document doc = dBuilder.parse(new ByteArrayInputStream(xml.toString().getBytes())); NodeList nList = doc.getElementsByTagName("property"); for (int temp = 0; temp < nList.getLength(); temp++) { Node nNode = nList.item(temp); if (nNode.getNodeType() == Node.ELEMENT_NODE) { Element eElement = (Element) nNode; String name = getTagValue("name", eElement); String value = getTagValue("value", eElement); if ((null != name) && (null != value)) { config.set(name, value); } } } } catch (Exception e) { throw new IOException(e.getMessage()); } // Some other config defaults: // (not sure if these are actually applied, or derived from the defaults - for some reason they don't appear in CDH's client config) config.set("mapred.map.tasks.speculative.execution", "false"); config.set("mapred.reduce.tasks.speculative.execution", "false"); // (default security is ignored here, have it set via HADOOP_TASKTRACKER_CONF in cloudera) // Now run the JAR file try { BasicDBObject advancedConfigurationDbo = null; try { advancedConfigurationDbo = (null != job.query) ? ((BasicDBObject) com.mongodb.util.JSON.parse(job.query)) : (new BasicDBObject()); } catch (Exception e) { advancedConfigurationDbo = new BasicDBObject(); } boolean esMode = advancedConfigurationDbo.containsField("qt") && !job.isCustomTable; if (esMode && !job.inputCollection.equals("doc_metadata.metadata")) { throw new RuntimeException( "Infinit.e Queries are only supported on doc_metadata - use MongoDB queries instead."); } config.setBoolean("mapred.used.genericoptionsparser", true); // (just stops an annoying warning from appearing) if (bLocalMode) { // local job tracker and FS mode config.set("mapred.job.tracker", "local"); config.set("fs.default.name", "local"); } else { if (bTestMode) { // run job tracker locally but FS mode remotely config.set("mapred.job.tracker", "local"); } else { // normal job tracker String trackerUrl = HadoopUtils.getXMLProperty( props_custom.getHadoopConfigPath() + "/hadoop/mapred-site.xml", "mapred.job.tracker"); config.set("mapred.job.tracker", trackerUrl); } String fsUrl = HadoopUtils.getXMLProperty( props_custom.getHadoopConfigPath() + "/hadoop/core-site.xml", "fs.default.name"); config.set("fs.default.name", fsUrl); } if (!dataModelLoaded && !(bTestMode || bLocalMode)) { // If running distributed and no data model loaded then add ourselves Path jarToCache = InfiniteHadoopUtils.cacheLocalFile("/opt/infinite-home/lib/", "infinit.e.data_model.jar", config); DistributedCache.addFileToClassPath(jarToCache, config); jarToCache = InfiniteHadoopUtils.cacheLocalFile("/opt/infinite-home/lib/", "infinit.e.processing.custom.library.jar", config); DistributedCache.addFileToClassPath(jarToCache, config); } //TESTED // Debug scripts (only if they exist), and only in non local/test mode if (!bLocalMode && !bTestMode) { try { Path scriptToCache = InfiniteHadoopUtils.cacheLocalFile("/opt/infinite-home/scripts/", "custom_map_error_handler.sh", config); config.set("mapred.map.task.debug.script", "custom_map_error_handler.sh " + job.jobtitle); config.set("mapreduce.map.debug.script", "custom_map_error_handler.sh " + job.jobtitle); DistributedCache.createSymlink(config); DistributedCache.addCacheFile(scriptToCache.toUri(), config); } catch (Exception e) { } // just carry on try { Path scriptToCache = InfiniteHadoopUtils.cacheLocalFile("/opt/infinite-home/scripts/", "custom_reduce_error_handler.sh", config); config.set("mapred.reduce.task.debug.script", "custom_reduce_error_handler.sh " + job.jobtitle); config.set("mapreduce.reduce.debug.script", "custom_reduce_error_handler.sh " + job.jobtitle); DistributedCache.createSymlink(config); DistributedCache.addCacheFile(scriptToCache.toUri(), config); } catch (Exception e) { } // just carry on } //TODO (???): TOTEST // (need to do these 2 things here before the job is created, at which point the config class has been copied across) //1) Class<?> mapperClazz = Class.forName(job.mapper, true, child); if (ICustomInfiniteInternalEngine.class.isAssignableFrom(mapperClazz)) { // Special case: internal custom engine, so gets an additional integration hook ICustomInfiniteInternalEngine preActivities = (ICustomInfiniteInternalEngine) mapperClazz .newInstance(); preActivities.preTaskActivities(job._id, job.communityIds, config, !(bTestMode || bLocalMode)); } //TESTED //2) if (job.inputCollection.equalsIgnoreCase("file.binary_shares")) { // Need to download the GridFSZip file try { Path jarToCache = InfiniteHadoopUtils.cacheLocalFile("/opt/infinite-home/lib/unbundled/", "GridFSZipFile.jar", config); DistributedCache.addFileToClassPath(jarToCache, config); } catch (Throwable t) { } // (this is fine, will already be on the classpath .. otherwise lots of other stuff will be failing all over the place!) } if (job.inputCollection.equals("records")) { InfiniteElasticsearchHadoopUtils.handleElasticsearchInput(job, config, advancedConfigurationDbo); //(won't run under 0.19 so running with "records" should cause all sorts of exceptions) } //TESTED (by hand) if (bTestMode || bLocalMode) { // If running locally, turn "snappy" off - tomcat isn't pointing its native library path in the right place config.set("mapred.map.output.compression.codec", "org.apache.hadoop.io.compress.DefaultCodec"); } // Manually specified caches List<URL> localJarCaches = InfiniteHadoopUtils.handleCacheList(advancedConfigurationDbo.get("$caches"), job, config, props_custom); Job hj = new Job(config); // (NOTE: from here, changes to config are ignored) try { if (null != localJarCaches) { if (bLocalMode || bTestMode) { Method method = URLClassLoader.class.getDeclaredMethod("addURL", new Class[] { URL.class }); method.setAccessible(true); method.invoke(child, localJarCaches.toArray()); } //TOTEST (tested logically) } Class<?> classToLoad = Class.forName(job.mapper, true, child); hj.setJarByClass(classToLoad); if (job.inputCollection.equalsIgnoreCase("filesystem")) { String inputPath = null; try { inputPath = MongoDbUtil.getProperty(advancedConfigurationDbo, "file.url"); if (!inputPath.endsWith("/")) { inputPath = inputPath + "/"; } } catch (Exception e) { } if (null == inputPath) { throw new RuntimeException("Must specify 'file.url' if reading from filesystem."); } inputPath = InfiniteHadoopUtils.authenticateInputDirectory(job, inputPath); InfiniteFileInputFormat.addInputPath(hj, new Path(inputPath + "*/*")); // (that extra bit makes it recursive) InfiniteFileInputFormat.setMaxInputSplitSize(hj, 33554432); // (32MB) InfiniteFileInputFormat.setInfiniteInputPathFilter(hj, config); hj.setInputFormatClass((Class<? extends InputFormat>) Class.forName( "com.ikanow.infinit.e.data_model.custom.InfiniteFileInputFormat", true, child)); } else if (job.inputCollection.equalsIgnoreCase("file.binary_shares")) { String[] oidStrs = null; try { String inputPath = MongoDbUtil.getProperty(advancedConfigurationDbo, "file.url"); Pattern oidExtractor = Pattern.compile("inf://share/([^/]+)"); Matcher m = oidExtractor.matcher(inputPath); if (m.find()) { oidStrs = m.group(1).split("\\s*,\\s*"); } else { throw new RuntimeException( "file.url must be in format inf://share/<oid-list>/<string>: " + inputPath); } InfiniteHadoopUtils.authenticateShareList(job, oidStrs); } catch (Exception e) { throw new RuntimeException( "Authentication error: " + e.getMessage() + ": " + advancedConfigurationDbo, e); } hj.getConfiguration().setStrings("mapred.input.dir", oidStrs); hj.setInputFormatClass((Class<? extends InputFormat>) Class.forName( "com.ikanow.infinit.e.data_model.custom.InfiniteShareInputFormat", true, child)); } else if (job.inputCollection.equals("records")) { hj.setInputFormatClass((Class<? extends InputFormat>) Class .forName("com.ikanow.infinit.e.data_model.custom.InfiniteEsInputFormat", true, child)); } else { if (esMode) { hj.setInputFormatClass((Class<? extends InputFormat>) Class.forName( "com.ikanow.infinit.e.processing.custom.utils.InfiniteElasticsearchMongoInputFormat", true, child)); } else { hj.setInputFormatClass((Class<? extends InputFormat>) Class.forName( "com.ikanow.infinit.e.data_model.custom.InfiniteMongoInputFormat", true, child)); } } if ((null != job.exportToHdfs) && job.exportToHdfs) { //TODO (INF-2469): Also, if the output key is BSON then also run as text (but output as JSON?) Path outPath = InfiniteHadoopUtils.ensureOutputDirectory(job, props_custom); if ((null != job.outputKey) && (null != job.outputValue) && job.outputKey.equalsIgnoreCase("org.apache.hadoop.io.text") && job.outputValue.equalsIgnoreCase("org.apache.hadoop.io.text")) { // (slight hack before I sort out the horrendous job class - if key/val both text and exporting to HDFS then output as Text) hj.setOutputFormatClass((Class<? extends OutputFormat>) Class .forName("org.apache.hadoop.mapreduce.lib.output.TextOutputFormat", true, child)); TextOutputFormat.setOutputPath(hj, outPath); } //TESTED else { hj.setOutputFormatClass((Class<? extends OutputFormat>) Class.forName( "org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat", true, child)); SequenceFileOutputFormat.setOutputPath(hj, outPath); } //TESTED } else { // normal case, stays in MongoDB hj.setOutputFormatClass((Class<? extends OutputFormat>) Class.forName( "com.ikanow.infinit.e.data_model.custom.InfiniteMongoOutputFormat", true, child)); } hj.setMapperClass((Class<? extends Mapper>) mapperClazz); String mapperOutputKeyOverride = advancedConfigurationDbo.getString("$mapper_key_class", null); if (null != mapperOutputKeyOverride) { hj.setMapOutputKeyClass(Class.forName(mapperOutputKeyOverride)); } //TESTED String mapperOutputValueOverride = advancedConfigurationDbo.getString("$mapper_value_class", null); if (null != mapperOutputValueOverride) { hj.setMapOutputValueClass(Class.forName(mapperOutputValueOverride)); } //TESTED if ((null != job.reducer) && !job.reducer.startsWith("#") && !job.reducer.equalsIgnoreCase("null") && !job.reducer.equalsIgnoreCase("none")) { hj.setReducerClass((Class<? extends Reducer>) Class.forName(job.reducer, true, child)); // Variable reducers: if (null != job.query) { try { hj.setNumReduceTasks(advancedConfigurationDbo.getInt("$reducers", 1)); } catch (Exception e) { try { // (just check it's not a string that is a valid int) hj.setNumReduceTasks( Integer.parseInt(advancedConfigurationDbo.getString("$reducers", "1"))); } catch (Exception e2) { } } } //TESTED } else { hj.setNumReduceTasks(0); } if ((null != job.combiner) && !job.combiner.startsWith("#") && !job.combiner.equalsIgnoreCase("null") && !job.combiner.equalsIgnoreCase("none")) { hj.setCombinerClass((Class<? extends Reducer>) Class.forName(job.combiner, true, child)); } hj.setOutputKeyClass(Class.forName(job.outputKey, true, child)); hj.setOutputValueClass(Class.forName(job.outputValue, true, child)); hj.setJobName(job.jobtitle); currJobName = job.jobtitle; } catch (Error e) { // (messing about with class loaders = lots of chances for errors!) throw new RuntimeException(e.getMessage(), e); } if (bTestMode || bLocalMode) { hj.submit(); currThreadId = null; Logger.getRootLogger().addAppender(this); currLocalJobId = hj.getJobID().toString(); currLocalJobErrs.setLength(0); while (!hj.isComplete()) { Thread.sleep(1000); } Logger.getRootLogger().removeAppender(this); if (hj.isSuccessful()) { if (this.currLocalJobErrs.length() > 0) { return "local_done: " + this.currLocalJobErrs.toString(); } else { return "local_done"; } } else { return "Error: " + this.currLocalJobErrs.toString(); } } else { hj.submit(); String jobId = hj.getJobID().toString(); return jobId; } } catch (Exception e) { e.printStackTrace(); Thread.currentThread().setContextClassLoader(savedClassLoader); return "Error: " + InfiniteHadoopUtils.createExceptionMessage(e); } finally { Thread.currentThread().setContextClassLoader(savedClassLoader); } }
From source file:com.ikanow.infinit.e.processing.custom.launcher.CustomHadoopTaskLauncher.java
License:Open Source License
private void createConfigXML(Writer out, String title, String input, String fields, boolean isCustomTable, String outputDatabase, String output, String tempOutputCollection, String mapper, String reducer, String combiner, String query, List<ObjectId> communityIds, String outputKey, String outputValue, String arguments, Boolean incrementalMode, ObjectId userId, Boolean selfMerge, String originalOutputCollection, Boolean appendResults) throws IOException { String dbserver = prop_general.getDatabaseServer(); output = outputDatabase + "." + tempOutputCollection; boolean isAdmin = AuthUtils.isAdmin(userId); int nSplits = 8; int nDocsPerSplit = 12500; //add communities to query if this is not a custom table BasicDBObject oldQueryObj = null; BasicDBObject srcTags = null;/*from ww w . j a v a 2 s. c o m*/ // Start with the old query: if (query.startsWith("{")) { oldQueryObj = (BasicDBObject) com.mongodb.util.JSON.parse(query); } else { oldQueryObj = new BasicDBObject(); } boolean elasticsearchQuery = oldQueryObj.containsField("qt") && !isCustomTable; int nLimit = 0; if (oldQueryObj.containsField("$limit")) { nLimit = oldQueryObj.getInt("$limit"); oldQueryObj.remove("$limit"); } if (oldQueryObj.containsField("$splits")) { nSplits = oldQueryObj.getInt("$splits"); oldQueryObj.remove("$splits"); } if (oldQueryObj.containsField("$srctags")) { srcTags = new BasicDBObject(SourcePojo.tags_, oldQueryObj.get("$srctags")); oldQueryObj.remove("$srctags"); } if (bLocalMode) { // If in local mode, then set this to a large number so we always run inside our limit/split version // (since for some reason MongoInputFormat seems to fail on large collections) nSplits = InfiniteMongoSplitter.MAX_SPLITS; } if (oldQueryObj.containsField("$docsPerSplit")) { nDocsPerSplit = oldQueryObj.getInt("$docsPerSplit"); oldQueryObj.remove("$docsPerSplit"); } oldQueryObj.remove("$fields"); oldQueryObj.remove("$output"); oldQueryObj.remove("$reducers"); String mapperKeyClass = oldQueryObj.getString("$mapper_key_class", ""); String mapperValueClass = oldQueryObj.getString("$mapper_value_class", ""); oldQueryObj.remove("$mapper_key_class"); oldQueryObj.remove("$mapper_value_class"); String cacheList = null; Object cacheObj = oldQueryObj.get("$caches"); if (null != cacheObj) { cacheList = cacheObj.toString(); // (either array of strings, or single string) if (!cacheList.startsWith("[")) { cacheList = "[" + cacheList + "]"; // ("must" now be valid array) } oldQueryObj.remove("$caches"); } //TESTED if (null != nDebugLimit) { // (debug mode override) nLimit = nDebugLimit; } boolean tmpIncMode = (null != incrementalMode) && incrementalMode; Date fromOverride = null; Date toOverride = null; Object fromOverrideObj = oldQueryObj.remove("$tmin"); Object toOverrideObj = oldQueryObj.remove("$tmax"); if (null != fromOverrideObj) { fromOverride = InfiniteHadoopUtils.dateStringFromObject(fromOverrideObj, true); } if (null != toOverrideObj) { toOverride = InfiniteHadoopUtils.dateStringFromObject(toOverrideObj, false); } if (!isCustomTable) { if (elasticsearchQuery) { oldQueryObj.put("communityIds", communityIds); //tmin/tmax not supported - already have that capability as part of the query } else { if (input.equals("feature.temporal")) { if ((null != fromOverride) || (null != toOverride)) { oldQueryObj.put("value.maxTime", InfiniteHadoopUtils.createDateRange(fromOverride, toOverride, true)); } //TESTED oldQueryObj.put("_id.c", new BasicDBObject(DbManager.in_, communityIds)); } else { oldQueryObj.put(DocumentPojo.communityId_, new BasicDBObject(DbManager.in_, communityIds)); if ((null != fromOverride) || (null != toOverride)) { oldQueryObj.put("_id", InfiniteHadoopUtils.createDateRange(fromOverride, toOverride, false)); } //TESTED if (input.equals("doc_metadata.metadata")) { oldQueryObj.put(DocumentPojo.index_, new BasicDBObject(DbManager.ne_, "?DEL?")); // (ensures not soft-deleted) } } } } else { if ((null != fromOverride) || (null != toOverride)) { oldQueryObj.put("_id", InfiniteHadoopUtils.createDateRange(fromOverride, toOverride, false)); } //TESTED //get the custom table (and database) input = CustomOutputManager.getCustomDbAndCollection(input); } query = oldQueryObj.toString(); if (arguments == null) arguments = ""; // Generic configuration out.write("<?xml version=\"1.0\"?>\n<configuration>"); // Mongo specific configuration out.write("\n\t<property><!-- name of job shown in jobtracker --><name>mongo.job.name</name><value>" + title + "</value></property>" + "\n\t<property><!-- run the job verbosely ? --><name>mongo.job.verbose</name><value>true</value></property>" + "\n\t<property><!-- Run the job in the foreground and wait for response, or background it? --><name>mongo.job.background</name><value>false</value></property>" + "\n\t<property><!-- If you are reading from mongo, the URI --><name>mongo.input.uri</name><value>mongodb://" + dbserver + "/" + input + "</value></property>" + "\n\t<property><!-- If you are writing to mongo, the URI --><name>mongo.output.uri</name><value>mongodb://" + dbserver + "/" + output + "</value> </property>" + "\n\t<property><!-- The query, in JSON, to execute [OPTIONAL] --><name>mongo.input.query</name><value>" + StringEscapeUtils.escapeXml(query) + "</value></property>" + "\n\t<property><!-- The fields, in JSON, to read [OPTIONAL] --><name>mongo.input.fields</name><value>" + ((fields == null) ? ("") : fields) + "</value></property>" + "\n\t<property><!-- A JSON sort specification for read [OPTIONAL] --><name>mongo.input.sort</name><value></value></property>" + "\n\t<property><!-- The number of documents to limit to for read [OPTIONAL] --><name>mongo.input.limit</name><value>" + nLimit + "</value><!-- 0 == no limit --></property>" + "\n\t<property><!-- The number of documents to skip in read [OPTIONAL] --><!-- TODO - Are we running limit() or skip() first? --><name>mongo.input.skip</name><value>0</value> <!-- 0 == no skip --></property>" + "\n\t<property><!-- Class for the mapper --><name>mongo.job.mapper</name><value>" + mapper + "</value></property>" + "\n\t<property><!-- Reducer class --><name>mongo.job.reducer</name><value>" + reducer + "</value></property>" + "\n\t<property><!-- InputFormat Class --><name>mongo.job.input.format</name><value>com.ikanow.infinit.e.data_model.custom.InfiniteMongoInputFormat</value></property>" + "\n\t<property><!-- OutputFormat Class --><name>mongo.job.output.format</name><value>com.ikanow.infinit.e.data_model.custom.InfiniteMongoOutputFormat</value></property>" + "\n\t<property><!-- Output key class for the output format --><name>mongo.job.output.key</name><value>" + outputKey + "</value></property>" + "\n\t<property><!-- Output value class for the output format --><name>mongo.job.output.value</name><value>" + outputValue + "</value></property>" + "\n\t<property><!-- Output key class for the mapper [optional] --><name>mongo.job.mapper.output.key</name><value>" + mapperKeyClass + "</value></property>" + "\n\t<property><!-- Output value class for the mapper [optional] --><name>mongo.job.mapper.output.value</name><value>" + mapperValueClass + "</value></property>" + "\n\t<property><!-- Class for the combiner [optional] --><name>mongo.job.combiner</name><value>" + combiner + "</value></property>" + "\n\t<property><!-- Partitioner class [optional] --><name>mongo.job.partitioner</name><value></value></property>" + "\n\t<property><!-- Sort Comparator class [optional] --><name>mongo.job.sort_comparator</name><value></value></property>" + "\n\t<property><!-- Split Size [optional] --><name>mongo.input.split_size</name><value>32</value></property>"); // Infinit.e specific configuration out.write("\n\t<property><!-- User Arguments [optional] --><name>infinit.e.userid</name><value>" + StringEscapeUtils.escapeXml(userId.toString()) + "</value></property>" + "\n\t<property><!-- User Arguments [optional] --><name>arguments</name><value>" + StringEscapeUtils.escapeXml(arguments) + "</value></property>" + "\n\t<property><!-- Maximum number of splits [optional] --><name>max.splits</name><value>" + nSplits + "</value></property>" + "\n\t<property><!-- Maximum number of docs per split [optional] --><name>max.docs.per.split</name><value>" + nDocsPerSplit + "</value></property>" + "\n\t<property><!-- Infinit.e incremental mode [optional] --><name>update.incremental</name><value>" + tmpIncMode + "</value></property>" + "\n\t<property><!-- Infinit.e quick admin check [optional] --><name>infinit.e.is.admin</name><value>" + isAdmin + "</value></property>" + "\n\t<property><!-- Infinit.e userid [optional] --><name>infinit.e.userid</name><value>" + userId + "</value></property>"); if (null != cacheList) { out.write( "\n\t<property><!-- Infinit.e cache list [optional] --><name>infinit.e.cache.list</name><value>" + cacheList + "</value></property>"); } //TESTED if (null != srcTags) { out.write( "\n\t<property><!-- Infinit.e src tags filter [optional] --><name>infinit.e.source.tags.filter</name><value>" + srcTags.toString() + "</value></property>"); } if (null != selfMerge && selfMerge && originalOutputCollection != null) { originalOutputCollection = "mongodb://" + dbserver + "/" + outputDatabase + "." + originalOutputCollection; out.write( "\n\t<property><!-- This jobs output collection for passing into the mapper along with input collection [optional] --><name>infinit.e.selfMerge</name><value>" + originalOutputCollection + "</value></property>"); } // Closing thoughts: out.write("\n</configuration>"); out.flush(); out.close(); }
From source file:com.ikanow.infinit.e.processing.custom.launcher.CustomSavedQueryQueueLauncher.java
License:Open Source License
public static void createAlertDocSummary(StringBuffer alertEmailText, int docNum, int numDocSummaries, BasicDBObject doc, String rootUrl) { if (docNum < numDocSummaries) { // Preamble on the first doc if (0 == docNum) { alertEmailText.append("<p>"); alertEmailText.append("Top ").append(numDocSummaries); if (0 == numDocSummaries) { alertEmailText.append(" document:"); } else { alertEmailText.append(" documents:"); }/*from ww w . j av a 2 s . c o m*/ alertEmailText.append("</p>"); alertEmailText.append("\n"); alertEmailText.append("<ol>"); alertEmailText.append("\n"); } // Docs: StringBuffer guiQuery = new StringBuffer("{\"qt\":[{\"ftext\":\"_id:") .append(doc.getObjectId(DocumentPojo._id_)).append("\"}]}"); String url = doc.getString(DocumentPojo.displayUrl_, doc.getString(DocumentPojo.url_)); String title = doc.getString(DocumentPojo.title_, url); alertEmailText.append("<li/>"); alertEmailText.append(title); alertEmailText.append(" ["); alertEmailText.append(doc.getDate(DocumentPojo.publishedDate_, doc.getDate(DocumentPojo.created_))); alertEmailText.append("]"); alertEmailText.append(" ("); alertEmailText.append("<a href=\"").append(rootUrl); try { alertEmailText.append("?query="); alertEmailText.append(URLEncoder.encode(guiQuery.toString(), "UTF-8")); alertEmailText.append("&communityIds=").append( doc.getObjectId(DocumentPojo.communityId_, new ObjectId("4c927585d591d31d7b37097a"))); } catch (Exception e) { } // (just carry on) alertEmailText.append("\">"); alertEmailText.append("GUI</a>)"); if ((null != url) && (url.startsWith("http"))) { alertEmailText.append(" ("); alertEmailText.append("<a href=\"").append(url).append("\">"); alertEmailText.append("External Link</a>)"); } alertEmailText.append("\n"); } }
From source file:com.ikanow.infinit.e.processing.custom.utils.CustomApiUtils.java
License:Apache License
public static void getJobResults(ResponsePojo rp, CustomMapReduceJobPojo cmr, int limit, String fields, String findStr, String sortStr, boolean bCsv) { BasicDBObject queryDbo = null;// w w w . ja v a 2 s. c o m if (null != findStr) { queryDbo = (BasicDBObject) com.mongodb.util.JSON.parse(findStr); } else { queryDbo = new BasicDBObject(); } //TOTEST BasicDBObject fieldsDbo = new BasicDBObject(); if (null != fields) { fieldsDbo = (BasicDBObject) com.mongodb.util.JSON.parse("{" + fields + "}"); } //return the results: // Need to handle sorting... BasicDBObject sort = null; if (null != sortStr) { //override sort = (BasicDBObject) com.mongodb.util.JSON.parse(sortStr); } else { //defaults String sortField = "_id"; int sortDir = 1; BasicDBObject postProcObject = (BasicDBObject) com.mongodb.util.JSON.parse( InfiniteHadoopUtils.getQueryOrProcessing(cmr.query, InfiniteHadoopUtils.QuerySpec.POSTPROC)); if (postProcObject != null) { sortField = postProcObject.getString("sortField", "_id"); sortDir = postProcObject.getInt("sortDirection", 1); } //TESTED (post proc and no post proc) sort = new BasicDBObject(sortField, sortDir); } //TOTEST // Case 1: DB rp.setResponse(new ResponseObject("Custom Map Reduce Job Results", true, "Map reduce job completed at: " + cmr.lastCompletionTime)); if ((null == cmr.exportToHdfs) || !cmr.exportToHdfs) { DBCursor resultCursor = null; DBCollection coll = DbManager.getCollection(cmr.getOutputDatabase(), cmr.outputCollection); DBDecoderFactory defaultDecoder = coll.getDBDecoderFactory(); CsvGeneratingBsonDecoder csvDecoder = null; SizeReportingBasicBSONDecoder sizeDecoder = null; CustomMapReduceResultPojo cmrr = new CustomMapReduceResultPojo(); try { if (bCsv) { coll.setDBDecoderFactory((csvDecoder = new CsvGeneratingBsonDecoder())); } else { coll.setDBDecoderFactory((sizeDecoder = new SizeReportingBasicBSONDecoder())); } if (limit > 0) { resultCursor = coll.find(queryDbo, fieldsDbo).sort(sort).limit(limit); } else { resultCursor = coll.find(queryDbo, fieldsDbo).sort(sort); } LinkedList<BasicDBObject> list = null; if (!bCsv) { list = new LinkedList<BasicDBObject>(); } final int MAX_SIZE_CSV = 80 * 1024 * 1024; //(80MB) final int MAX_SIZE_JSON = 80 * 1024 * 1024; //(80MB) while (resultCursor.hasNext()) { BasicDBObject x = (BasicDBObject) resultCursor.next(); if (!bCsv) { list.add(x); } if (null != csvDecoder) { if (csvDecoder.getCsv().length() > MAX_SIZE_CSV) { break; } } else if (null != sizeDecoder) { if (sizeDecoder.getSize() > MAX_SIZE_JSON) { break; } } } cmrr.results = list; } finally { coll.setDBDecoderFactory(defaultDecoder); } cmrr.lastCompletionTime = cmr.lastCompletionTime; if (null != csvDecoder) { StringBuffer header = new StringBuffer(); for (String field : csvDecoder.getOrderedFields()) { if (0 != header.length()) { header.append(','); } header.append('"'); header.append(field.replace("\"", "\\\"")); header.append("\""); } header.append('\n'); header.append(csvDecoder.getCsv().toString()); cmrr.results = header.toString(); } rp.setData(cmrr); } //TESTED else { // Case 2: HDFS if ((null != cmr.outputKey) && (null != cmr.outputValue) && cmr.outputKey.equalsIgnoreCase("org.apache.hadoop.io.text") && cmr.outputValue.equalsIgnoreCase("org.apache.hadoop.io.text")) { // special case, text file try { rp.setData(HadoopUtils.getBsonFromTextFiles(cmr, limit, fields), (BasePojoApiMap<BasicDBList>) null); } catch (Exception e) { rp.setResponse(new ResponseObject("Custom Map Reduce Job Results", false, "Files don't appear to be in text file format, did you run the job before changing the output to Text/Text?")); } } //TESTED else { // sequence file try { rp.setData(HadoopUtils.getBsonFromSequenceFile(cmr, limit, fields), (BasePojoApiMap<BasicDBList>) null); } catch (Exception e) { rp.setResponse(new ResponseObject("Custom Map Reduce Job Results", false, "Files don't appear to be in sequence file format, did you run the job with Text/Text?")); } } //TESTED } //TESTED }
From source file:com.ikanow.infinit.e.processing.custom.utils.SourcePipelineToCustomConversion.java
License:Apache License
public static void convertSourcePipeline(SourcePojo in, List<CustomMapReduceJobPojo> out, boolean testNotCreateMode) { BasicDBObject query = new BasicDBObject(); BasicDBObject queryOutput = null; // (holds complex object) // Not sure if this will be string or JSON object.. StringBuffer args = null;/*from w w w .jav a 2 s . c o m*/ BasicDBObject argsJson = null; boolean haveInput = false; SourcePipelinePojo scorecard = new SourcePipelinePojo(); List<String> caches = new LinkedList<String>(); // Create a generic-ish set of fields for the job CustomMapReduceJobPojo job = handleInitializeOrGetJob(in, testNotCreateMode); // Now modify the fields based on the processing pipeline if (null != in.getProcessingPipeline()) for (SourcePipelinePojo px : in.getProcessingPipeline()) { if (null != px.custom_datastoreQuery) { if (haveInput) throw new RuntimeException("Currently only support one input block"); haveInput = true; job.isCustomTable = true; job.inputCollection = px.custom_datastoreQuery.customTable; query = handleCommonInFields(px.custom_datastoreQuery.query, px.custom_datastoreQuery.fields, px.custom_datastoreQuery.tmin, px.custom_datastoreQuery.tmax, null, null); } else if (null != px.custom_file) { // HDFS or Ikanow share if (haveInput) throw new RuntimeException("Currently only support one input block"); haveInput = true; SourcePojo temp = new SourcePojo(); temp.setFileConfig(px.custom_file); BasicDBObject fileObj = (BasicDBObject) temp.toDb().get(SourcePojo.file_); query = new BasicDBObject(SourcePojo.file_, fileObj); String url = fileObj.getString("url", "will_error_later"); if (url.startsWith("inf://share/")) { job.inputCollection = "file.binary_shares"; } else { fileObj.put("url", url.replace("hdfs:///", "/").replace("hdfs:", "")); // (get rid of leading hdfs:) job.inputCollection = "filesystem"; } } else if (null != px.docs_datastoreQuery) { if (haveInput) throw new RuntimeException("Currently only support one input block"); haveInput = true; if (ContentMode.content == px.docs_datastoreQuery.contentMode) { job.inputCollection = "doc_content.gzip_content"; } else if ((null == px.docs_datastoreQuery.contentMode) || (ContentMode.metadata == px.docs_datastoreQuery.contentMode)) { job.inputCollection = "doc_metadata.metadata"; } else { throw new RuntimeException( "Both content + metadata in the same job: not currently supported"); } query = handleCommonInFields(px.docs_datastoreQuery.query, px.docs_datastoreQuery.fields, px.docs_datastoreQuery.tmin, px.docs_datastoreQuery.tmax, px.docs_datastoreQuery.srcTags, null); } else if (null != px.docs_documentQuery) { if (haveInput) throw new RuntimeException("Currently only support one input block"); haveInput = true; job.inputCollection = "doc_metadata.metadata"; query = handleDocumentQuery(px.docs_documentQuery.query, in, job); } else if (null != px.records_indexQuery) { if (haveInput) throw new RuntimeException("Currently only support one input block"); haveInput = true; job.inputCollection = "records"; query = handleCommonInFields(null, null, px.records_indexQuery.tmin, px.records_indexQuery.tmax, null, new BasicDBObject()); if (null != px.records_indexQuery.query) { if (px.records_indexQuery.query.trim().startsWith("{")) { query.put("query", com.mongodb.util.JSON.parse(px.records_indexQuery.query)); } else { query.put("query", px.records_indexQuery.query); } } if (null != px.records_indexQuery.filter) { if (px.records_indexQuery.filter.trim().startsWith("{")) { query.put("filter", com.mongodb.util.JSON.parse(px.records_indexQuery.filter)); } else { query.put("filter", px.records_indexQuery.filter); } } if (null != px.records_indexQuery.types) { query.put("$types", px.records_indexQuery.types); } if (null != px.records_indexQuery.streamingMode) { if (StreamingMode.stashed == px.records_indexQuery.streamingMode) { query.put("$streaming", false); } else if (StreamingMode.streaming == px.records_indexQuery.streamingMode) { query.put("$streaming", true); } //(else don't set $streaming, defaults to both) } // (else don't set $streaming, defaults to both) } else if (null != px.feature_datastoreQuery) { if (haveInput) throw new RuntimeException("Currently only support one input block"); haveInput = true; if (FeatureName.association == px.feature_datastoreQuery.featureName) { job.inputCollection = "feaure.association"; } else if (FeatureName.entity == px.feature_datastoreQuery.featureName) { job.inputCollection = "feaure.entity"; } else if (FeatureName.temporal == px.feature_datastoreQuery.featureName) { job.inputCollection = "feaure.temporal"; } query = handleCommonInFields(px.feature_datastoreQuery.query, px.feature_datastoreQuery.fields, px.feature_datastoreQuery.tmin, px.feature_datastoreQuery.tmax, null, null); } else if (null != px.extraInputSettings) { if (!haveInput) throw new RuntimeException("Job must start with an input block"); handleGroupOverride(px.extraInputSettings.groupOverrideList, px.extraInputSettings.groupOverrideRegex, job, in); if (null != px.extraInputSettings.debugLimit) { query.put("$limit", px.extraInputSettings.debugLimit); } if (null != px.extraInputSettings.docsPerSplitOverride) { query.put("$docsPerSplit", px.extraInputSettings.docsPerSplitOverride); } if (null != px.extraInputSettings.numSplitsOverride) { query.put("$splits", px.extraInputSettings.numSplitsOverride); } } else if (null != px.scheduler) { if (null != scorecard.scheduler) throw new RuntimeException("Only support one scheduler"); scorecard.scheduler = px.scheduler; boolean isDisabled = false; if (null == px.scheduler.frequency) { px.scheduler.frequency = CustomScheduler.FrequencyMode.disabled; } if (CustomScheduler.FrequencyMode.once_only == px.scheduler.frequency) { job.scheduleFreq = SCHEDULE_FREQUENCY.NONE; } else if (CustomScheduler.FrequencyMode.hourly == px.scheduler.frequency) { job.scheduleFreq = SCHEDULE_FREQUENCY.HOURLY; } else if (CustomScheduler.FrequencyMode.daily == px.scheduler.frequency) { job.scheduleFreq = SCHEDULE_FREQUENCY.DAILY; } else if (CustomScheduler.FrequencyMode.weekly == px.scheduler.frequency) { job.scheduleFreq = SCHEDULE_FREQUENCY.WEEKLY; } else if (CustomScheduler.FrequencyMode.monthly == px.scheduler.frequency) { job.scheduleFreq = SCHEDULE_FREQUENCY.MONTHLY; } else if (CustomScheduler.FrequencyMode.disabled == px.scheduler.frequency) { isDisabled = true; job.scheduleFreq = SCHEDULE_FREQUENCY.NONE; job.nextRunTime = CustomApiUtils.DONT_RUN_TIME; } else if (CustomScheduler.FrequencyMode.ondemand == px.scheduler.frequency) { isDisabled = true; job.nextRunTime = CustomApiUtils.DONT_RUN_TIME; //01-01-2099 in milliseconds! Will use this constant to mean "dont' run" - CustomHandler.DONT_RUN_TIME //TODO (INF-2865): to implement throw new RuntimeException("'OnDemand' not yet supported"); } if (!isDisabled) { if (null != scorecard.scheduler.runDate) { Date d = InfiniteHadoopUtils.dateStringFromObject(scorecard.scheduler.runDate, true); if (null != d) { // Special case: if once_only and runDate < now then update it if (CustomScheduler.FrequencyMode.once_only == px.scheduler.frequency) { long now = new Date().getTime(); if (d.getTime() < now) { job.nextRunTime = now; } else { job.nextRunTime = d.getTime(); } } else { // (otherwise retain it so that it gets used to determine the next time) job.nextRunTime = d.getTime(); } } } else if (Long.MAX_VALUE == job.nextRunTime) { // (ie not set => field left at its default) job.nextRunTime = new Date().getTime(); } if ((null == job.firstSchedule) || (CustomApiUtils.DONT_RUN_TIME == job.firstSchedule.getTime())) { // (ie if firstSchedule not set then set it) job.firstSchedule = new Date(job.nextRunTime); } } //(else already set) if (null != scorecard.scheduler.autoDependency) { //(will eventually automatically automatically generate a dependency on any custom input tables) //TODO (INF-2865): to implement throw new RuntimeException("'Automatic dependencies' not yet supported"); } if (null != scorecard.scheduler.dependencies) { try { job.jobDependencies = new HashSet<ObjectId>(scorecard.scheduler.dependencies.size()); for (String depId : scorecard.scheduler.dependencies) { job.jobDependencies.add(new ObjectId(depId)); } } catch (Exception e) { throw new RuntimeException("Custom Scheduler Dependencies: invalid Dependency in " + Arrays.toString(scorecard.scheduler.dependencies.toArray())); } } // First time through, can overwrite some of the fields: if ((null == in.getHarvestStatus()) || (null == in.getHarvestStatus().getHarvest_status())) { job.timesRan = 0; // (if we're setting the initial override, then need to ensure that it's unset after running) job.timesFailed = 0; // Unset any tmin/tmax/srctags fields if set to " "s String tminOver = px.scheduler.tmin_initialOverride; String tmaxOver = px.scheduler.tmax_initialOverride; String srctagsOver = px.scheduler.srcTags_initialOverride; if (null != tminOver) { tminOver = tminOver.trim(); // (hence will be ignored) if (tminOver.isEmpty()) { query.remove("$tmin"); } } if (null != tmaxOver) { tmaxOver = tmaxOver.trim(); if (tmaxOver.isEmpty()) { query.remove("$tmax"); } } if (null != srctagsOver) { srctagsOver = srctagsOver.trim(); if (srctagsOver.isEmpty()) { query.remove("$srctags"); } } //TESTED (custom_scheduler_test_2, custom_scheduler_test_1) if (null == px.scheduler.query_initialOverride) { // easy, just override fields from existing query query = handleCommonInFields(null, null, tminOver, tmaxOver, srctagsOver, query); } //TESTED (custom_scheduler_test_1) else { // one extra complication ... if tmin/tmax/srctags _aren't_ overridden then use originals instead if (null == tminOver) tminOver = query.getString("$tmin"); if (null == tmaxOver) tmaxOver = query.getString("$tmax"); if (null == srctagsOver) srctagsOver = query.getString("$srctags"); query = handleCommonInFields(px.scheduler.query_initialOverride, null, tminOver, tmaxOver, srctagsOver, null); } //TESTED (custom_scheduler_test_2 - some fields override (+ve or -ve), some pulled from original) } //TESTED (that first time through harvest|harvest.status==null, subsequently not) } else if (null != px.artefacts) { if (!haveInput) throw new RuntimeException("Job must start with an input block"); if (null != px.artefacts.mainJar) { String jar = null; // A few options: // $infinite/.../<id> or <id> or a URL try { jar = new ObjectId(px.artefacts.mainJar).toString(); jar = "$infinite/share/get/" + jar; } catch (Exception e) { } // fall through to... if (null == jar) { jar = px.artefacts.mainJar; } job.jarURL = jar; } if (null != px.artefacts.extraJars) { for (String jarId : px.artefacts.extraJars) { caches.add(jarId); } } if (null != px.artefacts.joinTables) { for (String shareId : px.artefacts.joinTables) { caches.add(shareId); } } if (null != px.artefacts.selfJoin) { job.selfMerge = px.artefacts.selfJoin; } } else if (null != px.mapper) { if (!haveInput) throw new RuntimeException("Job must start with an input block"); if (null != scorecard.scriptingEngine) throw new RuntimeException("Can't have a scriptingEngine and mapper"); if (null != scorecard.hadoopEngine) throw new RuntimeException("Can't have a hadoopEngine and mapper"); if (null != scorecard.mapper) throw new RuntimeException("Currently only support one mapper"); scorecard.mapper = px.mapper; job.mapper = px.mapper.mapperClass; if (null != px.mapper.mapperKeyClass) { query.put("$mapper_key_class", px.mapper.mapperKeyClass); } if (null != px.mapper.mapperValueClass) { query.put("$mapper_value_class", px.mapper.mapperValueClass); } } else if (null != px.combiner) { if (!haveInput) throw new RuntimeException("Job must start with an input block"); if (null != scorecard.scriptingEngine) throw new RuntimeException("Can't have a scriptingEngine and combiner"); if (null != scorecard.hadoopEngine) throw new RuntimeException("Can't have a hadoopEngine and combiner"); if (null != scorecard.combiner) throw new RuntimeException("Currently only support one combiner"); scorecard.combiner = px.combiner; job.combiner = px.combiner.combinerClass; } else if (null != px.reducer) { if (!haveInput) throw new RuntimeException("Job must start with an input block"); if (null != scorecard.scriptingEngine) throw new RuntimeException("Can't have a scriptingEngine and reducer"); if (null != scorecard.hadoopEngine) throw new RuntimeException("Can't have a hadoopEngine and reducer"); if (null != scorecard.reducer) throw new RuntimeException("Currently only support one reducer"); scorecard.reducer = px.reducer; job.reducer = px.reducer.reducerClass; if (null != px.reducer.numReducers) { query.put("$reducers", px.reducer.numReducers); } if (null != px.reducer.outputKeyClass) { job.outputKey = px.reducer.outputKeyClass; } if (null != px.reducer.outputValueClass) { job.outputValue = px.reducer.outputValueClass; } } else if (null != px.hadoopEngine) { if (!haveInput) throw new RuntimeException("Job must start with an input block"); if (null != scorecard.scriptingEngine) throw new RuntimeException("Only one of: scriptingEngine, hadoopEngine"); if (null != scorecard.hadoopEngine) throw new RuntimeException("Only support one hadoopEngine"); if (null != scorecard.mapper) throw new RuntimeException("Can't have a hadoopEngine and mapper"); if (null != scorecard.combiner) throw new RuntimeException("Can't have a hadoopEngine and combiner"); if (null != scorecard.reducer) throw new RuntimeException("Can't have a hadoopEngine and reducer"); scorecard.hadoopEngine = px.hadoopEngine; if (null != px.hadoopEngine.mainJar) { String jar = null; // A few options: // $infinite/.../<id> or <id> or a URL try { jar = new ObjectId(px.hadoopEngine.mainJar).toString(); jar = "$infinite/share/get/" + jar; } catch (Exception e) { } // fall through to... if (null == jar) { jar = px.hadoopEngine.mainJar; } job.jarURL = jar; } job.mapper = px.hadoopEngine.mapperClass; if (null != px.hadoopEngine.combinerClass) { job.combiner = px.hadoopEngine.combinerClass; } else { job.combiner = "none"; } if (null != px.hadoopEngine.reducerClass) { job.reducer = px.hadoopEngine.reducerClass; } else { job.reducer = "none"; } job.outputKey = px.hadoopEngine.outputKeyClass; job.outputValue = px.hadoopEngine.outputValueClass; if (null != px.hadoopEngine.mapperKeyClass) { query.put("$mapper_key_class", px.hadoopEngine.mapperKeyClass); } if (null != px.hadoopEngine.mapperValueClass) { query.put("$mapper_value_class", px.hadoopEngine.mapperValueClass); } if (null != px.hadoopEngine.numReducers) { query.put("$reducers", px.hadoopEngine.numReducers); } if (null != px.hadoopEngine.configuration) { if (px.hadoopEngine.configuration.trim().startsWith("{")) { argsJson = (BasicDBObject) com.mongodb.util.JSON.parse(px.hadoopEngine.configuration); if (null != px.hadoopEngine.configParams) for (Map.Entry<String, String> param : px.hadoopEngine.configParams.entrySet()) { argsJson.put(param.getKey(), param.getValue()); } } else { args = new StringBuffer(px.hadoopEngine.configuration); if (null != px.hadoopEngine.configParams) { throw new RuntimeException( "Can only specify hadoopEngine.configParams when hadoopEngine.configuration is in JSON format"); } } } else { args = new StringBuffer(); // (ie just "") } } else if (null != px.scriptingEngine) { if (!haveInput) throw new RuntimeException("Job must start with an input block"); if (null != scorecard.hadoopEngine) throw new RuntimeException("Only one of: scriptingEngine, hadoopEngine"); if (null != scorecard.scriptingEngine) throw new RuntimeException("Only support one scriptingEngine"); if (null != scorecard.mapper) throw new RuntimeException("Can't have a scriptingEngine and mapper"); if (null != scorecard.combiner) throw new RuntimeException("Can't have a scriptingEngine and combiner"); if (null != scorecard.reducer) throw new RuntimeException("Can't have a scriptingEngine and reducer"); scorecard.scriptingEngine = px.scriptingEngine; //TODO (INF-2865): handle jython scripting engine (mainJar and also the classes below) job.jarURL = InfiniteHadoopUtils.BUILT_IN_JOB_PATH; args = new StringBuffer(); if (null != px.scriptingEngine.numReducers) { query.put("$reducers", px.scriptingEngine.numReducers); } if (null != px.scriptingEngine.memoryOptimized) { args.append("_memoryOptimization = ").append(px.scriptingEngine.memoryOptimized) .append(";\n\n"); } if ((null != px.scriptingEngine.globalScript) && !px.scriptingEngine.globalScript.isEmpty()) { args.append(px.scriptingEngine.globalScript).append("\n\n"); } job.mapper = "com.ikanow.infinit.e.utility.hadoop.HadoopPrototypingTool$JavascriptMapper"; if ((null != px.scriptingEngine.mapScript) && !px.scriptingEngine.mapScript.isEmpty()) { args.append(px.scriptingEngine.mapScript).append("\n\n"); } if ((null != px.scriptingEngine.combineScript) && !px.scriptingEngine.combineScript.isEmpty()) { job.combiner = "com.ikanow.infinit.e.utility.hadoop.HadoopPrototypingTool$JavascriptCombiner"; args.append(px.scriptingEngine.combineScript).append("\n\n"); } else { job.combiner = "#com.ikanow.infinit.e.utility.hadoop.HadoopPrototypingTool$JavascriptCombiner"; } if ((null != px.scriptingEngine.reduceScript) && !px.scriptingEngine.reduceScript.isEmpty()) { job.reducer = "com.ikanow.infinit.e.utility.hadoop.HadoopPrototypingTool$JavascriptReducer"; args.append(px.scriptingEngine.reduceScript).append("\n\n"); } else { job.reducer = "#com.ikanow.infinit.e.utility.hadoop.HadoopPrototypingTool$JavascriptReducer"; } job.outputKey = "com.mongodb.hadoop.io.BSONWritable"; job.outputValue = "com.mongodb.hadoop.io.BSONWritable"; } else if (null != px.tableOutput) { if (!haveInput) throw new RuntimeException("Job must start with an input block"); if (null != scorecard.tableOutput) throw new RuntimeException("Only support one tableOutput"); scorecard.tableOutput = px.tableOutput; if (null != px.tableOutput.ageOut_days) { job.appendAgeOutInDays = px.tableOutput.ageOut_days; } if (null != px.tableOutput.globalObjectLimit) { if (null == queryOutput) { queryOutput = new BasicDBObject(); query.put("$output", queryOutput); } queryOutput.put("limit", px.tableOutput.globalObjectLimit); queryOutput.put("limitAllData", true); } if (null != px.tableOutput.perCycleObjectLimit) { if (null != px.tableOutput.globalObjectLimit) { throw new RuntimeException( "Currently can support only one of: globalObjectLimit, perCycleObjectLimit in tableOutput"); } if (null == queryOutput) { queryOutput = new BasicDBObject(); query.put("$output", queryOutput); } queryOutput.put("limit", px.tableOutput.globalObjectLimit); queryOutput.put("limitAllData", false); } if (null != px.tableOutput.sortDirection) { if (null == queryOutput) { queryOutput = new BasicDBObject(); query.put("$output", queryOutput); } queryOutput.put("sortDirection", px.tableOutput.sortDirection); } if (null != px.tableOutput.sortField) { if (null == queryOutput) { queryOutput = new BasicDBObject(); query.put("$output", queryOutput); } queryOutput.put("sortField", px.tableOutput.sortField); } if (null != px.tableOutput.appendMode) { if (AppendMode.append_merge == px.tableOutput.appendMode) { job.appendResults = true; job.incrementalMode = false; } else if (AppendMode.append_reduce == px.tableOutput.appendMode) { job.appendResults = true; job.incrementalMode = true; } //(else leave alone) } if (null != px.tableOutput.dataStoreIndexes) { if (null == queryOutput) { queryOutput = new BasicDBObject(); query.put("$output", queryOutput); } queryOutput.put("indexed", com.mongodb.util.JSON.parse(px.tableOutput.dataStoreIndexes)); } if (!testNotCreateMode) { if (null != px.tableOutput.indexed) { if (px.tableOutput.indexed) { if (null == queryOutput) { queryOutput = new BasicDBObject(); query.put("$output", queryOutput); } queryOutput.put("indexMode", "custom"); } } } if (null != px.tableOutput.postFixName) { throw new RuntimeException( "Can't currently specify a postFix for job names - job name == source key"); } } //(don't allow any other output types in test mode?) } //(end loop over pipeline elements) completeJob(job, query, caches, (null != args) ? args.toString() : null, argsJson, scorecard); out.add(job); }