List of usage examples for java.util Queue peek
E peek();
From source file:org.apache.hadoop.hive.ql.io.CombineHiveInputFormat.java
/** * Create Hive splits based on CombineFileSplit. *///from w w w . java 2s . c o m private InputSplit[] getCombineSplits(JobConf job, int numSplits, Map<String, PartitionDesc> pathToPartitionInfo) throws IOException { init(job); Map<String, ArrayList<String>> pathToAliases = mrwork.getPathToAliases(); Map<String, Operator<? extends OperatorDesc>> aliasToWork = mrwork.getAliasToWork(); CombineFileInputFormatShim combine = ShimLoader.getHadoopShims().getCombineFileInputFormat(); InputSplit[] splits = null; if (combine == null) { splits = super.getSplits(job, numSplits); return splits; } if (combine.getInputPathsShim(job).length == 0) { throw new IOException("No input paths specified in job"); } ArrayList<InputSplit> result = new ArrayList<InputSplit>(); // combine splits only from same tables and same partitions. Do not combine splits from multiple // tables or multiple partitions. Path[] paths = combine.getInputPathsShim(job); List<Path> inpDirs = new ArrayList<Path>(); List<Path> inpFiles = new ArrayList<Path>(); Map<CombinePathInputFormat, CombineFilter> poolMap = new HashMap<CombinePathInputFormat, CombineFilter>(); Set<Path> poolSet = new HashSet<Path>(); for (Path path : paths) { PartitionDesc part = HiveFileFormatUtils.getPartitionDescFromPathRecursively(pathToPartitionInfo, path, IOPrepareCache.get().allocatePartitionDescMap()); TableDesc tableDesc = part.getTableDesc(); if ((tableDesc != null) && tableDesc.isNonNative()) { return super.getSplits(job, numSplits); } // Use HiveInputFormat if any of the paths is not splittable Class inputFormatClass = part.getInputFileFormatClass(); String inputFormatClassName = inputFormatClass.getName(); InputFormat inputFormat = getInputFormatFromCache(inputFormatClass, job); String deserializerClassName = null; try { deserializerClassName = part.getDeserializer(job).getClass().getName(); } catch (Exception e) { // ignore } FileSystem inpFs = path.getFileSystem(job); // Since there is no easy way of knowing whether MAPREDUCE-1597 is present in the tree or not, // we use a configuration variable for the same if (this.mrwork != null && !this.mrwork.getHadoopSupportsSplittable()) { // The following code should be removed, once // https://issues.apache.org/jira/browse/MAPREDUCE-1597 is fixed. // Hadoop does not handle non-splittable files correctly for CombineFileInputFormat, // so don't use CombineFileInputFormat for non-splittable files //ie, dont't combine if inputformat is a TextInputFormat and has compression turned on if (inputFormat instanceof TextInputFormat) { Queue<Path> dirs = new LinkedList<Path>(); FileStatus fStats = inpFs.getFileStatus(path); // If path is a directory if (fStats.isDir()) { dirs.offer(path); } else if ((new CompressionCodecFactory(job)).getCodec(path) != null) { //if compresssion codec is set, use HiveInputFormat.getSplits (don't combine) splits = super.getSplits(job, numSplits); return splits; } while (dirs.peek() != null) { Path tstPath = dirs.remove(); FileStatus[] fStatus = inpFs.listStatus(tstPath, FileUtils.HIDDEN_FILES_PATH_FILTER); for (int idx = 0; idx < fStatus.length; idx++) { if (fStatus[idx].isDir()) { dirs.offer(fStatus[idx].getPath()); } else if ((new CompressionCodecFactory(job)) .getCodec(fStatus[idx].getPath()) != null) { //if compresssion codec is set, use HiveInputFormat.getSplits (don't combine) splits = super.getSplits(job, numSplits); return splits; } } } } } //don't combine if inputformat is a SymlinkTextInputFormat if (inputFormat instanceof SymlinkTextInputFormat) { splits = super.getSplits(job, numSplits); return splits; } Path filterPath = path; // Does a pool exist for this path already CombineFilter f = null; List<Operator<? extends OperatorDesc>> opList = null; if (!mrwork.isMapperCannotSpanPartns()) { //if mapper can span partitions, make sure a splits does not contain multiple // opList + inputFormatClassName + deserializerClassName combination // This is done using the Map of CombinePathInputFormat to PathFilter opList = HiveFileFormatUtils.doGetWorksFromPath(pathToAliases, aliasToWork, filterPath); CombinePathInputFormat combinePathInputFormat = new CombinePathInputFormat(opList, inputFormatClassName, deserializerClassName); f = poolMap.get(combinePathInputFormat); if (f == null) { f = new CombineFilter(filterPath); LOG.info("CombineHiveInputSplit creating pool for " + path + "; using filter path " + filterPath); combine.createPool(job, f); poolMap.put(combinePathInputFormat, f); } else { LOG.info("CombineHiveInputSplit: pool is already created for " + path + "; using filter path " + filterPath); f.addPath(filterPath); } } else { // In the case of tablesample, the input paths are pointing to files rather than directories. // We need to get the parent directory as the filtering path so that all files in the same // parent directory will be grouped into one pool but not files from different parent // directories. This guarantees that a split will combine all files in the same partition // but won't cross multiple partitions if the user has asked so. if (!path.getFileSystem(job).getFileStatus(path).isDir()) { // path is not directory filterPath = path.getParent(); inpFiles.add(path); poolSet.add(filterPath); } else { inpDirs.add(path); } } } // Processing directories List<CombineFileSplit> iss = new ArrayList<CombineFileSplit>(); if (!mrwork.isMapperCannotSpanPartns()) { //mapper can span partitions //combine into as few as one split, subject to the PathFilters set // using combine.createPool. iss = Arrays.asList(combine.getSplits(job, 1)); } else { for (Path path : inpDirs) { processPaths(job, combine, iss, path); } if (inpFiles.size() > 0) { // Processing files for (Path filterPath : poolSet) { combine.createPool(job, new CombineFilter(filterPath)); } processPaths(job, combine, iss, inpFiles.toArray(new Path[0])); } } if (mrwork.getNameToSplitSample() != null && !mrwork.getNameToSplitSample().isEmpty()) { iss = sampleSplits(iss); } for (CombineFileSplit is : iss) { CombineHiveInputSplit csplit = new CombineHiveInputSplit(job, is, pathToPartitionInfo); result.add(csplit); } LOG.info("number of splits " + result.size()); return result.toArray(new CombineHiveInputSplit[result.size()]); }
From source file:org.apache.hadoop.hive.ql.MultiDriver.java
private int multipreoptimizetest() throws CommandNeedRetryException { int i;//from w ww .ja v a 2 s . com PerfLogger perfLogger = PerfLogger.getPerfLogger(); for (i = 0; i < cmds.size(); i++) { TaskFactory.resetId(); ParseContext pCtx = multiPctx.get(i); // conf=(HiveConf)confs.get(i); conf = pCtx.getConf(); ctx = pCtx.getContext(); LOG.info("Before MultidoPhase2forTest Optree:\n" + Operator.toString(pCtx.getTopOps().values())); // do Optimizer gen MR task SemanticAnalyzer sem; try { sem = new SemanticAnalyzer(conf); sem.MultidoPhase2forTest(pCtx); sem.validate(); plan = new QueryPlan(cmds.get(i), sem, perfLogger.getStartTime(PerfLogger.DRIVER_RUN)); if (false) { String queryPlanFileName = ctx.getLocalScratchDir(true) + Path.SEPARATOR_CHAR + "queryplan.xml"; LOG.info("query plan = " + queryPlanFileName); queryPlanFileName = new Path(queryPlanFileName).toUri().getPath(); // serialize the queryPlan FileOutputStream fos = new FileOutputStream(queryPlanFileName); Utilities.serializeObject(plan, fos); fos.close(); } // initialize FetchTask right here if (plan.getFetchTask() != null) { plan.getFetchTask().initialize(conf, plan, null); } // get the output schema schema = schemas.get(i); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } boolean requireLock = false; boolean ckLock = checkLockManager(); if (ckLock) { boolean lockOnlyMapred = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_LOCK_MAPRED_ONLY); if (lockOnlyMapred) { Queue<Task<? extends Serializable>> taskQueue = new LinkedList<Task<? extends Serializable>>(); taskQueue.addAll(plan.getRootTasks()); while (taskQueue.peek() != null) { Task<? extends Serializable> tsk = taskQueue.remove(); requireLock = requireLock || tsk.requireLock(); if (requireLock) { break; } if (tsk instanceof ConditionalTask) { taskQueue.addAll(((ConditionalTask) tsk).getListTasks()); } if (tsk.getChildTasks() != null) { taskQueue.addAll(tsk.getChildTasks()); } // does not add back up task here, because back up task should be the same // type of the original task. } } else { requireLock = true; } } int ret; if (requireLock) { ret = acquireReadWriteLocks(); if (ret != 0) { releaseLocks(ctx.getHiveLocks()); // return new CommandProcessorResponse(ret, errorMessage, SQLState); } } ret = execute(); if (ret != 0) { //if needRequireLock is false, the release here will do nothing because there is no lock releaseLocks(ctx.getHiveLocks()); // return new CommandProcessorResponse(ret, errorMessage, SQLState); } //if needRequireLock is false, the release here will do nothing because there is no lock releaseLocks(ctx.getHiveLocks()); //test output SessionState ss = SessionState.get(); PrintStream out = ss.out; ArrayList<String> res = new ArrayList<String>(); LOG.info("Output the result of query ID(" + i + "):"); printHeader(this, out); int counter = 0; try { while (this.getResults(res)) { for (String r : res) { out.println(r); } counter += res.size(); res.clear(); if (out.checkError()) { break; } } } catch (IOException e) { console.printError("Failed with exception " + e.getClass().getName() + ":" + e.getMessage(), "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e)); ret = 1; } } return 0; }
From source file:org.apache.hadoop.hive.ql.MultiDriver.java
private CommandProcessorResponse runInternal(ArrayList<Pair<String, Configuration>> multiCmds) throws CommandNeedRetryException { errorMessage = null;// www.j ava 2 s . c o m SQLState = null; downstreamError = null; if (!validateConfVariables()) { return new CommandProcessorResponse(12, errorMessage, SQLState); } // Reset the perf logger PerfLogger perfLogger = PerfLogger.getPerfLogger(true); perfLogger.PerfLogBegin(LOG, PerfLogger.MULTIDRIVER_RUN); perfLogger.PerfLogBegin(LOG, PerfLogger.TIME_TO_SUBMIT); //createcachetable(multiCmds); int ret; synchronized (compileMonitor) { ret = multiCompile(multiCmds); } if (ret != 0) { for (int key = 0; key < multiPctx.size(); key++) { Context ctx = multiPctx.get(key).getContext(); releaseLocks(ctx.getHiveLocks()); } return new CommandProcessorResponse(ret, errorMessage, SQLState); } boolean requireLock = false; boolean ckLock = checkLockManager(); if (ckLock) { boolean lockOnlyMapred = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_LOCK_MAPRED_ONLY); if (lockOnlyMapred) { Queue<Task<? extends Serializable>> taskQueue = new LinkedList<Task<? extends Serializable>>(); taskQueue.addAll(plan.getRootTasks()); while (taskQueue.peek() != null) { Task<? extends Serializable> tsk = taskQueue.remove(); requireLock = requireLock || tsk.requireLock(); if (requireLock) { break; } if (tsk instanceof ConditionalTask) { taskQueue.addAll(((ConditionalTask) tsk).getListTasks()); } if (tsk.getChildTasks() != null) { taskQueue.addAll(tsk.getChildTasks()); } // does not add back up task here, because back up task should be the same // type of the original task. } } else { requireLock = true; } } if (requireLock) { ret = acquireReadWriteLocks(); if (ret != 0) { releaseLocks(ctx.getHiveLocks()); // return new CommandProcessorResponse(ret, errorMessage, SQLState); } } boolean isexplain = ctx.getExplain(); // if(isexplain){ // multiOutputexplain(); // }else{ // reserved function ret = multiExecute(); if (ret != 0) { //if needRequireLock is false, the release here will do nothing because there is no lock releaseLocks(ctx.getHiveLocks()); return new CommandProcessorResponse(ret, errorMessage, SQLState); } multiOutputResult(); //if needRequireLock is false, the release here will do nothing because there is no lock releaseLocks(ctx.getHiveLocks()); for (int key = 0; key < multiPctx.size(); key++) { Context ctx = multiPctx.get(key).getContext(); releaseLocks(ctx.getHiveLocks()); } multiPctx.clear(); perfLogger.PerfLogEnd(LOG, PerfLogger.MULTIDRIVER_RUN); perfLogger.close(LOG, plan); return new CommandProcessorResponse(ret); }
From source file:org.apache.hadoop.hive.ql.MultiDriver.java
private int createcachetable(ArrayList<Pair<String, Configuration>> multiCmds) throws CommandNeedRetryException { int ret;/*from www. j av a 2 s. c om*/ synchronized (compileMonitor) { ret = CreateTableCompile(multiCmds, true); } boolean requireLock = false; boolean ckLock = checkLockManager(); if (ckLock) { boolean lockOnlyMapred = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_LOCK_MAPRED_ONLY); if (lockOnlyMapred) { Queue<Task<? extends Serializable>> taskQueue = new LinkedList<Task<? extends Serializable>>(); taskQueue.addAll(plan.getRootTasks()); while (taskQueue.peek() != null) { Task<? extends Serializable> tsk = taskQueue.remove(); requireLock = requireLock || tsk.requireLock(); if (requireLock) { break; } if (tsk instanceof ConditionalTask) { taskQueue.addAll(((ConditionalTask) tsk).getListTasks()); } if (tsk.getChildTasks() != null) { taskQueue.addAll(tsk.getChildTasks()); } // does not add back up task here, because back up task should be the same // type of the original task. } } else { requireLock = true; } } if (requireLock) { ret = acquireReadWriteLocks(); if (ret != 0) { releaseLocks(ctx.getHiveLocks()); // return new CommandProcessorResponse(ret, errorMessage, SQLState); } } ret = multiExecute(); return ret; }
From source file:org.apache.hadoop.hive.ql.MultiDriver_BAK.java
private CommandProcessorResponse runInternal(ArrayList<Pair<String, Configuration>> multiCmds) throws CommandNeedRetryException { errorMessage = null;/*from w w w . ja v a 2s . c o m*/ SQLState = null; downstreamError = null; if (!validateConfVariables()) { return new CommandProcessorResponse(12, errorMessage, SQLState); } // Reset the perf logger PerfLogger perfLogger = PerfLogger.getPerfLogger(true); perfLogger.PerfLogBegin(LOG, PerfLogger.MULTIDRIVER_RUN); perfLogger.PerfLogBegin(LOG, PerfLogger.TIME_TO_SUBMIT); int ret; synchronized (compileMonitor) { ret = multiCompile(multiCmds); } if (ret != 0) { for (int key = 0; key < multiPctx.size(); key++) { Context ctx = multiPctx.get(key).getContext(); releaseLocks(ctx.getHiveLocks()); } return new CommandProcessorResponse(ret, errorMessage, SQLState); } boolean requireLock = false; boolean ckLock = checkLockManager(); if (ckLock) { boolean lockOnlyMapred = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_LOCK_MAPRED_ONLY); if (lockOnlyMapred) { Queue<Task<? extends Serializable>> taskQueue = new LinkedList<Task<? extends Serializable>>(); taskQueue.addAll(plan.getRootTasks()); while (taskQueue.peek() != null) { Task<? extends Serializable> tsk = taskQueue.remove(); requireLock = requireLock || tsk.requireLock(); if (requireLock) { break; } if (tsk instanceof ConditionalTask) { taskQueue.addAll(((ConditionalTask) tsk).getListTasks()); } if (tsk.getChildTasks() != null) { taskQueue.addAll(tsk.getChildTasks()); } // does not add back up task here, because back up task should be the same // type of the original task. } } else { requireLock = true; } } if (requireLock) { ret = acquireReadWriteLocks(); if (ret != 0) { releaseLocks(ctx.getHiveLocks()); // return new CommandProcessorResponse(ret, errorMessage, SQLState); } } boolean isexplain = ctx.getExplain(); // if(isexplain){ // multiOutputexplain(); // }else{ // reserved function ret = multiExecute(); if (ret != 0) { //if needRequireLock is false, the release here will do nothing because there is no lock releaseLocks(ctx.getHiveLocks()); return new CommandProcessorResponse(ret, errorMessage, SQLState); } multiOutputResult(); //if needRequireLock is false, the release here will do nothing because there is no lock releaseLocks(ctx.getHiveLocks()); for (int key = 0; key < multiPctx.size(); key++) { Context ctx = multiPctx.get(key).getContext(); releaseLocks(ctx.getHiveLocks()); } multiPctx.clear(); perfLogger.PerfLogEnd(LOG, PerfLogger.MULTIDRIVER_RUN); perfLogger.close(LOG, plan); return new CommandProcessorResponse(ret); }
From source file:org.apache.hadoop.hive.ql.QueryPlan.java
/** * generate the operator graph and operator list for the given task based on * the operators corresponding to that task. * * @param task/* www . j av a 2 s . co m*/ * api.Task which needs its operator graph populated * @param topOps * the set of top operators from which the operator graph for the * task is hanging */ private void populateOperatorGraph(org.apache.hadoop.hive.ql.plan.api.Task task, Collection<Operator<? extends OperatorDesc>> topOps) { task.setOperatorGraph(new org.apache.hadoop.hive.ql.plan.api.Graph()); task.getOperatorGraph().setNodeType(NodeType.OPERATOR); Queue<Operator<? extends OperatorDesc>> opsToVisit = new LinkedList<Operator<? extends OperatorDesc>>(); Set<Operator<? extends OperatorDesc>> opsVisited = new HashSet<Operator<? extends OperatorDesc>>(); opsToVisit.addAll(topOps); while (opsToVisit.peek() != null) { Operator<? extends OperatorDesc> op = opsToVisit.remove(); opsVisited.add(op); // populate the operator org.apache.hadoop.hive.ql.plan.api.Operator operator = new org.apache.hadoop.hive.ql.plan.api.Operator(); operator.setOperatorId(op.getOperatorId()); operator.setOperatorType(op.getType()); task.addToOperatorList(operator); // done processing the operator if (op.getChildOperators() != null) { org.apache.hadoop.hive.ql.plan.api.Adjacency entry = new org.apache.hadoop.hive.ql.plan.api.Adjacency(); entry.setAdjacencyType(AdjacencyType.CONJUNCTIVE); entry.setNode(op.getOperatorId()); for (Operator<? extends OperatorDesc> childOp : op.getChildOperators()) { entry.addToChildren(childOp.getOperatorId()); if (!opsVisited.contains(childOp)) { opsToVisit.add(childOp); } } task.getOperatorGraph().addToAdjacencyList(entry); } } }
From source file:org.apache.hadoop.hive.ql.QueryPlan.java
/** * Extract all the counters from tasks and operators. *//* w ww . j a v a 2 s . co m*/ private void extractCounters() throws IOException { Queue<Task<? extends Serializable>> tasksToVisit = new LinkedList<Task<? extends Serializable>>(); Set<Task<? extends Serializable>> tasksVisited = new HashSet<Task<? extends Serializable>>(); tasksToVisit.addAll(rootTasks); while (tasksToVisit.peek() != null) { Task<? extends Serializable> task = tasksToVisit.remove(); tasksVisited.add(task); // add children to tasksToVisit if (task.getChildTasks() != null) { for (Task<? extends Serializable> childTask : task.getChildTasks()) { if (!tasksVisited.contains(childTask)) { tasksToVisit.add(childTask); } } } if (task.getId() == null) { continue; } if (started.contains(task.getId()) && done.contains(task.getId())) { continue; } // get the counters for the task counters.put(task.getId(), task.getCounters()); // check if task is started if (task.started()) { started.add(task.getId()); } if (task.done()) { done.add(task.getId()); } if (task instanceof ExecDriver) { ExecDriver mrTask = (ExecDriver) task; if (mrTask.mapStarted()) { started.add(task.getId() + "_MAP"); } if (mrTask.mapDone()) { done.add(task.getId() + "_MAP"); } if (mrTask.hasReduce()) { if (mrTask.reduceStarted()) { started.add(task.getId() + "_REDUCE"); } if (mrTask.reduceDone()) { done.add(task.getId() + "_REDUCE"); } } } else if (task instanceof ConditionalTask) { ConditionalTask cTask = (ConditionalTask) task; for (Task<? extends Serializable> listTask : cTask.getListTasks()) { if (!tasksVisited.contains(listTask)) { tasksToVisit.add(listTask); } } } } }
From source file:org.apache.mahout.clustering.lda.LDAPrintTopics.java
private static void maybeEnqueue(Queue<Pair<String, Double>> q, String word, double score, int numWordsToPrint) { if (q.size() >= numWordsToPrint && score > q.peek().getSecond()) { q.poll();// www . j a va 2s .c o m } if (q.size() < numWordsToPrint) { q.add(new Pair<String, Double>(word, score)); } }
From source file:org.apache.predictionio.examples.java.recommendations.tutorial1.Algorithm.java
private void setTopItemSimilarity(Map<Integer, Queue<IndexAndScore>> topItemSimilarity, Integer itemID1, Integer index2, double score, int capacity, Comparator<IndexAndScore> comparator) { Queue<IndexAndScore> queue = topItemSimilarity.get(itemID1); if (queue == null) { queue = new PriorityQueue<IndexAndScore>(capacity, comparator); topItemSimilarity.put(itemID1, queue); }/* w w w .ja va2 s. c o m*/ IndexAndScore entry = new IndexAndScore(index2, score); if (queue.size() < capacity) queue.add(entry); else if (comparator.compare(queue.peek(), entry) < 0) { queue.poll(); queue.add(entry); } }
From source file:org.kuali.rice.krms.service.impl.RuleEditorMaintainableImpl.java
public AgendaItemDefinition maintainAgendaItems(AgendaEditor agenda, String namePrefix, String nameSpace) { Queue<RuleDefinition.Builder> rules = new LinkedList<RuleDefinition.Builder>(); for (RuleEditor rule : agenda.getRuleEditors().values()) { if (!rule.isDummy()) { rules.add(this.finRule(rule, namePrefix, nameSpace)); }//from w w w . ja v a 2 s . c o m } AgendaItemDefinition.Builder rootItemBuilder = manageFirstItem(agenda); AgendaItemDefinition.Builder itemToDelete = null; AgendaItemDefinition.Builder itemBuilder = rootItemBuilder; while (rules.peek() != null) { itemBuilder.setRule(rules.poll()); itemBuilder.setRuleId(itemBuilder.getRule().getId()); if (rules.peek() != null) { if (itemBuilder.getWhenTrue() == null) { itemBuilder.setWhenTrue(AgendaItemDefinition.Builder.create(null, agenda.getId())); } itemBuilder = itemBuilder.getWhenTrue(); } else { itemToDelete = itemBuilder.getWhenTrue(); itemBuilder.setWhenTrue(null); } } return manageAgendaItems(agenda, rootItemBuilder, itemToDelete); }