List of usage examples for org.apache.hadoop.mapreduce Job setSpeculativeExecution
public void setSpeculativeExecution(boolean speculativeExecution)
From source file:org.apache.accumulo.examples.simple.mapreduce.NGramIngest.java
License:Apache License
@Override public int run(String[] args) throws Exception { Opts opts = new Opts(); opts.parseArgs(getClass().getName(), args); Job job = JobUtil.getJob(getConf()); job.setJobName(getClass().getSimpleName()); job.setJarByClass(getClass());/*w ww . j a v a2 s . c o m*/ opts.setAccumuloConfigs(job); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(AccumuloOutputFormat.class); job.setMapperClass(NGramMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Mutation.class); job.setNumReduceTasks(0); job.setSpeculativeExecution(false); if (!opts.getConnector().tableOperations().exists(opts.getTableName())) { log.info("Creating table " + opts.getTableName()); opts.getConnector().tableOperations().create(opts.getTableName()); SortedSet<Text> splits = new TreeSet<Text>(); String numbers[] = "1 2 3 4 5 6 7 8 9".split("\\s"); String lower[] = "a b c d e f g h i j k l m n o p q r s t u v w x y z".split("\\s"); String upper[] = "A B C D E F G H I J K L M N O P Q R S T U V W X Y Z".split("\\s"); for (String[] array : new String[][] { numbers, lower, upper }) { for (String s : array) { splits.add(new Text(s)); } } opts.getConnector().tableOperations().addSplits(opts.getTableName(), splits); } TextInputFormat.addInputPath(job, new Path(opts.inputDirectory)); job.waitForCompletion(true); return job.isSuccessful() ? 0 : 1; }
From source file:org.apache.accumulo.test.mrit.IntegrationTestMapReduce.java
License:Apache License
@Override public int run(String[] args) throws Exception { // read a list of tests from the input, and print out the results if (args.length != 2) { System.err.println("Wrong number of args: <input> <output>"); return 1; }//from ww w . java 2 s . com Configuration conf = getConf(); Job job = Job.getInstance(conf, "accumulo integration test runner"); conf = job.getConfiguration(); // some tests take more than 10 minutes conf.setLong(MRJobConfig.TASK_TIMEOUT, 20 * 60 * 1000); // minicluster uses a lot of ram conf.setInt(MRJobConfig.MAP_MEMORY_MB, 4000); // hadoop puts an ancient version of jline on the classpath conf.setBoolean(MRJobConfig.MAPREDUCE_JOB_USER_CLASSPATH_FIRST, true); // no need to run a test multiple times job.setSpeculativeExecution(false); // read one line at a time job.setInputFormatClass(NLineInputFormat.class); NLineInputFormat.setNumLinesPerSplit(job, 1); // run the test job.setJarByClass(IntegrationTestMapReduce.class); job.setMapperClass(TestMapper.class); // group test by result code job.setReducerClass(TestReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); return job.waitForCompletion(true) ? 0 : 1; }
From source file:org.apache.hadoop.examples.QuasiMonteCarlo.java
License:Apache License
/** * Run a map/reduce job for estimating Pi. * * @return the estimated value of Pi//from w w w. j av a2 s. com */ public static BigDecimal estimatePi(int numMaps, long numPoints, Path tmpDir, Configuration conf) throws IOException, ClassNotFoundException, InterruptedException { Job job = Job.getInstance(conf); //setup job conf job.setJobName(QuasiMonteCarlo.class.getSimpleName()); job.setJarByClass(QuasiMonteCarlo.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputKeyClass(BooleanWritable.class); job.setOutputValueClass(LongWritable.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapperClass(QmcMapper.class); job.setReducerClass(QmcReducer.class); job.setNumReduceTasks(1); // turn off speculative execution, because DFS doesn't handle // multiple writers to the same file. job.setSpeculativeExecution(false); //setup input/output directories final Path inDir = new Path(tmpDir, "in"); final Path outDir = new Path(tmpDir, "out"); FileInputFormat.setInputPaths(job, inDir); FileOutputFormat.setOutputPath(job, outDir); final FileSystem fs = FileSystem.get(conf); if (fs.exists(tmpDir)) { throw new IOException( "Tmp directory " + fs.makeQualified(tmpDir) + " already exists. Please remove it first."); } if (!fs.mkdirs(inDir)) { throw new IOException("Cannot create input directory " + inDir); } try { //generate an input file for each map task for (int i = 0; i < numMaps; ++i) { final Path file = new Path(inDir, "part" + i); final LongWritable offset = new LongWritable(i * numPoints); final LongWritable size = new LongWritable(numPoints); final SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, file, LongWritable.class, LongWritable.class, CompressionType.NONE); try { writer.append(offset, size); } finally { writer.close(); } System.out.println("Wrote input for Map #" + i); } //start a map/reduce job System.out.println("Starting Job"); final long startTime = Time.monotonicNow(); job.waitForCompletion(true); if (!job.isSuccessful()) { System.out.println("Job " + job.getJobID() + " failed!"); System.exit(1); } final double duration = (Time.monotonicNow() - startTime) / 1000.0; System.out.println("Job Finished in " + duration + " seconds"); //read outputs Path inFile = new Path(outDir, "reduce-out"); LongWritable numInside = new LongWritable(); LongWritable numOutside = new LongWritable(); SequenceFile.Reader reader = new SequenceFile.Reader(fs, inFile, conf); try { reader.next(numInside, numOutside); } finally { reader.close(); } //compute estimated value final BigDecimal numTotal = BigDecimal.valueOf(numMaps).multiply(BigDecimal.valueOf(numPoints)); return BigDecimal.valueOf(4).setScale(20).multiply(BigDecimal.valueOf(numInside.get())).divide(numTotal, RoundingMode.HALF_UP); } finally { fs.delete(tmpDir, true); } }
From source file:org.apache.nutch.hostdb.UpdateHostDb.java
License:Apache License
private void updateHostDb(Path hostDb, Path crawlDb, Path topHosts, boolean checkFailed, boolean checkNew, boolean checkKnown, boolean force, boolean filter, boolean normalize) throws Exception { SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); long start = System.currentTimeMillis(); LOG.info("UpdateHostDb: starting at " + sdf.format(start)); Job job = NutchJob.getInstance(getConf()); Configuration conf = job.getConfiguration(); boolean preserveBackup = conf.getBoolean("db.preserve.backup", true); job.setJarByClass(UpdateHostDb.class); job.setJobName("UpdateHostDb"); FileSystem fs = hostDb.getFileSystem(conf); Path old = new Path(hostDb, "old"); Path current = new Path(hostDb, "current"); Path tempHostDb = new Path(hostDb, "hostdb-" + Integer.toString(new Random().nextInt(Integer.MAX_VALUE))); // lock an existing hostdb to prevent multiple simultaneous updates Path lock = new Path(hostDb, LOCK_NAME); if (!fs.exists(current)) { fs.mkdirs(current);/* w w w. j a v a2s. c o m*/ } LockUtil.createLockFile(fs, lock, false); MultipleInputs.addInputPath(job, current, SequenceFileInputFormat.class); if (topHosts != null) { MultipleInputs.addInputPath(job, topHosts, KeyValueTextInputFormat.class); } if (crawlDb != null) { // Tell the job we read from CrawlDB conf.setBoolean("hostdb.reading.crawldb", true); MultipleInputs.addInputPath(job, new Path(crawlDb, CrawlDb.CURRENT_NAME), SequenceFileInputFormat.class); } FileOutputFormat.setOutputPath(job, tempHostDb); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(NutchWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(HostDatum.class); job.setMapperClass(UpdateHostDbMapper.class); job.setReducerClass(UpdateHostDbReducer.class); job.setSpeculativeExecution(false); conf.setBoolean("mapreduce.fileoutputcommitter.marksuccessfuljobs", false); conf.setBoolean(HOSTDB_CHECK_FAILED, checkFailed); conf.setBoolean(HOSTDB_CHECK_NEW, checkNew); conf.setBoolean(HOSTDB_CHECK_KNOWN, checkKnown); conf.setBoolean(HOSTDB_FORCE_CHECK, force); conf.setBoolean(HOSTDB_URL_FILTERING, filter); conf.setBoolean(HOSTDB_URL_NORMALIZING, normalize); conf.setClassLoader(Thread.currentThread().getContextClassLoader()); try { boolean success = job.waitForCompletion(true); if (!success) { String message = "UpdateHostDb job did not succeed, job status:" + job.getStatus().getState() + ", reason: " + job.getStatus().getFailureInfo(); LOG.error(message); NutchJob.cleanupAfterFailure(tempHostDb, lock, fs); throw new RuntimeException(message); } FSUtils.replace(fs, old, current, true); FSUtils.replace(fs, current, tempHostDb, true); if (!preserveBackup && fs.exists(old)) fs.delete(old, true); } catch (Exception e) { LOG.error("UpdateHostDb job failed: {}", e.getMessage()); NutchJob.cleanupAfterFailure(tempHostDb, lock, fs); throw e; } LockUtil.removeLockFile(fs, lock); long end = System.currentTimeMillis(); LOG.info("UpdateHostDb: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end)); }
From source file:org.apache.nutch.util.hostdb.HostDb.java
License:Apache License
private void hostDb(Path hostDb, Path crawlDb, Path topHosts, boolean checkFailed, boolean checkNew, boolean checkKnown, boolean force, boolean filter, boolean normalize) throws Exception { long start = System.currentTimeMillis(); LOG.info("HostDb: starting at " + sdf.format(start)); Configuration conf = getConf(); conf.setBoolean("mapreduce.fileoutputcommitter.marksuccessfuljobs", false); conf.setBoolean(HOSTDB_CHECK_FAILED, checkFailed); conf.setBoolean(HOSTDB_CHECK_NEW, checkNew); conf.setBoolean(HOSTDB_CHECK_KNOWN, checkKnown); conf.setBoolean(HOSTDB_FORCE_CHECK, force); conf.setBoolean(HOSTDB_URL_FILTERING, filter); conf.setBoolean(HOSTDB_URL_NORMALIZING, normalize); // Check whether the urlfilter-domainblacklist plugin is loaded if (filter && "urlfilter-domainblacklist".matches(conf.get("plugin.includes"))) { throw new Exception("domainblacklist-urlfilter must not be enabled"); }//from w w w . java 2 s . c o m // Check whether the urlnormalizer-host plugin is loaded if (normalize && "urlnormalizer-host".matches(conf.get("plugin.includes"))) { throw new Exception("urlnormalizer-host must not be enabled"); } FileSystem fs = FileSystem.get(conf); Path old = new Path(hostDb, "old"); Path current = new Path(hostDb, CURRENT_NAME); Path tempHostDb = new Path(hostDb, "hostdb-" + Integer.toString(new Random().nextInt(Integer.MAX_VALUE))); // lock an existing hostdb to prevent multiple simultaneous updates Path lock = new Path(hostDb, LOCK_NAME); if (!fs.exists(current)) { fs.mkdirs(current); } LockUtil.createLockFile(fs, lock, false); Job job = new Job(conf, "HostDb " + hostDb); job.setJarByClass(HostDb.class); job.setSpeculativeExecution(false); MultipleInputs.addInputPath(job, current, SequenceFileInputFormat.class); if (topHosts != null) { MultipleInputs.addInputPath(job, topHosts, KeyValueTextInputFormat.class); } if (crawlDb != null) { // Tell the job we read from CrawlDB conf.setBoolean("hostdb.reading.crawldb", true); MultipleInputs.addInputPath(job, new Path(crawlDb, CrawlDb.CURRENT_NAME), SequenceFileInputFormat.class); } FileOutputFormat.setOutputPath(job, tempHostDb); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(NutchWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(HostDatum.class); job.setMapperClass(HostDbMapper.class); job.setReducerClass(HostDbReducer.class); try { job.waitForCompletion(true); FSUtils.replace(fs, old, current, true); FSUtils.replace(fs, current, tempHostDb, true); boolean preserveBackup = conf.getBoolean("db.preserve.backup", true); if (!preserveBackup && fs.exists(old)) fs.delete(old, true); } catch (Exception e) { if (fs.exists(tempHostDb)) { fs.delete(tempHostDb, true); } LockUtil.removeLockFile(fs, lock); throw e; } LockUtil.removeLockFile(fs, lock); long end = System.currentTimeMillis(); LOG.info("HostDb: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end)); }
From source file:org.apache.pirk.responder.wideskies.mapreduce.ComputeResponseTool.java
License:Apache License
private boolean computeExpTable() throws IOException, ClassNotFoundException, InterruptedException { boolean success; logger.info("Creating expTable"); // The split location for the interim calculations, delete upon completion Path splitDir = new Path("/tmp/splits-" + queryInfo.getIdentifier()); if (fs.exists(splitDir)) { fs.delete(splitDir, true);//from w w w.j av a 2s.co m } // Write the query hashes to the split files Map<Integer, BigInteger> queryElements = query.getQueryElements(); List<Integer> keys = new ArrayList<>(queryElements.keySet()); int numSplits = SystemConfiguration.getIntProperty("pir.expCreationSplits", 100); int elementsPerSplit = queryElements.size() / numSplits; // Integral division. logger.info("numSplits = " + numSplits + " elementsPerSplit = " + elementsPerSplit); for (int i = 0; i < numSplits; ++i) { // Grab the range of the thread int start = i * elementsPerSplit; int stop = start + elementsPerSplit - 1; if (i == (numSplits - 1)) { stop = queryElements.size() - 1; } HDFS.writeFileIntegers(keys.subList(start, stop), fs, new Path(splitDir, "split-" + i), false); } // Run the job to generate the expTable // Job jobExp = new Job(mrConfig.getConfig(), "pirExp-" + pirWL.getWatchlistNum()); Job jobExp = Job.getInstance(conf, "pirExp-" + queryInfo.getIdentifier()); jobExp.setSpeculativeExecution(false); jobExp.getConfiguration().set("mapreduce.map.speculative", "false"); jobExp.getConfiguration().set("mapreduce.reduce.speculative", "false"); // Set the memory and heap options jobExp.getConfiguration().set("mapreduce.map.memory.mb", SystemConfiguration.getProperty("mapreduce.map.memory.mb", "10000")); jobExp.getConfiguration().set("mapreduce.reduce.memory.mb", SystemConfiguration.getProperty("mapreduce.reduce.memory.mb", "10000")); jobExp.getConfiguration().set("mapreduce.map.java.opts", SystemConfiguration.getProperty("mapreduce.map.java.opts", "-Xmx9000m")); jobExp.getConfiguration().set("mapreduce.reduce.java.opts", SystemConfiguration.getProperty("mapreduce.reduce.java.opts", "-Xmx9000m")); jobExp.getConfiguration().set("mapreduce.reduce.shuffle.parallelcopies", "5"); jobExp.getConfiguration().set("pirMR.queryInputDir", SystemConfiguration.getProperty("pir.queryInput")); jobExp.getConfiguration().setBoolean("mapreduce.input.fileinputformat.input.dir.recursive", true); jobExp.setInputFormatClass(TextInputFormat.class); FileInputFormat.setInputPaths(jobExp, splitDir); jobExp.setJarByClass(ExpTableMapper.class); jobExp.setMapperClass(ExpTableMapper.class); jobExp.setMapOutputKeyClass(Text.class); jobExp.setMapOutputValueClass(Text.class); // Set the reducer and output params int numExpLookupPartitions = SystemConfiguration.getIntProperty("pir.numExpLookupPartitions", 100); jobExp.setNumReduceTasks(numExpLookupPartitions); jobExp.setReducerClass(ExpTableReducer.class); // Delete the output directory if it exists Path outPathExp = new Path(outputDirExp); if (fs.exists(outPathExp)) { fs.delete(outPathExp, true); } jobExp.setOutputKeyClass(Text.class); jobExp.setOutputValueClass(Text.class); FileOutputFormat.setOutputPath(jobExp, outPathExp); jobExp.getConfiguration().set("mapreduce.output.textoutputformat.separator", ","); MultipleOutputs.addNamedOutput(jobExp, FileConst.PIR, TextOutputFormat.class, Text.class, Text.class); MultipleOutputs.addNamedOutput(jobExp, FileConst.EXP, TextOutputFormat.class, Text.class, Text.class); // Submit job, wait for completion success = jobExp.waitForCompletion(true); // Assemble the exp table from the output // element_index -> fileName Map<Integer, String> expFileTable = new HashMap<>(); FileStatus[] status = fs.listStatus(outPathExp); for (FileStatus fstat : status) { if (fstat.getPath().getName().startsWith(FileConst.PIR)) { logger.info("fstat.getPath().getName().toString() = " + fstat.getPath().getName()); try { try (BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(fstat.getPath())))) { String line; while ((line = br.readLine()) != null) { String[] rowValTokens = line.split(","); // form is element_index,reducerNumber String fileName = fstat.getPath().getParent() + "/" + FileConst.EXP + "-r-" + rowValTokens[1]; logger.info("fileName = " + fileName); expFileTable.put(Integer.parseInt(rowValTokens[0]), fileName); } } } catch (Exception e) { e.printStackTrace(); } } } // Place exp table in query object query.setExpFileBasedLookup(expFileTable); new HadoopFileSystemStore(fs).store(queryInputDir, query); logger.info("Completed creation of expTable"); return success; }
From source file:org.apache.pirk.responder.wideskies.mapreduce.ComputeResponseTool.java
License:Apache License
@SuppressWarnings("unchecked") private boolean readDataEncRows(Path outPathInit) throws Exception { boolean success; Job job = Job.getInstance(conf, "pirMR"); job.setSpeculativeExecution(false); // Set the data and query schema properties job.getConfiguration().set("dataSchemaName", qSchema.getDataSchemaName()); job.getConfiguration().set("data.schemas", SystemConfiguration.getProperty("data.schemas")); job.getConfiguration().set("query.schemas", SystemConfiguration.getProperty("query.schemas")); // Set the memory and heap options job.getConfiguration().set("mapreduce.map.memory.mb", SystemConfiguration.getProperty("mapreduce.map.memory.mb", "2000")); job.getConfiguration().set("mapreduce.reduce.memory.mb", SystemConfiguration.getProperty("mapreduce.reduce.memory.mb", "2000")); job.getConfiguration().set("mapreduce.map.java.opts", SystemConfiguration.getProperty("mapreduce.map.java.opts", "-Xmx1800m")); job.getConfiguration().set("mapreduce.reduce.java.opts", SystemConfiguration.getProperty("mapreduce.reduce.java.opts", "-Xmx1800m")); // Set necessary files for Mapper setup job.getConfiguration().set("pirMR.queryInputDir", SystemConfiguration.getProperty("pir.queryInput")); job.getConfiguration().set("pirMR.stopListFile", SystemConfiguration.getProperty("pir.stopListFile")); job.getConfiguration().set("mapreduce.map.speculative", "false"); job.getConfiguration().set("mapreduce.reduce.speculative", "false"); job.getConfiguration().set("pirWL.useLocalCache", SystemConfiguration.getProperty("pir.useLocalCache", "true")); job.getConfiguration().set("pirWL.limitHitsPerSelector", SystemConfiguration.getProperty("pir.limitHitsPerSelector", "false")); job.getConfiguration().set("pirWL.maxHitsPerSelector", SystemConfiguration.getProperty("pir.maxHitsPerSelector", "100")); if (dataInputFormat.equals(InputFormatConst.ES)) { String jobName = "pirMR_es_" + esResource + "_" + esQuery + "_" + System.currentTimeMillis(); job.setJobName(jobName);/* ww w . j av a 2 s . co m*/ job.getConfiguration().set("es.nodes", SystemConfiguration.getProperty("es.nodes")); job.getConfiguration().set("es.port", SystemConfiguration.getProperty("es.port")); job.getConfiguration().set("es.resource", esResource); job.getConfiguration().set("es.query", esQuery); job.setInputFormatClass(EsInputFormat.class); } else if (dataInputFormat.equals(InputFormatConst.BASE_FORMAT)) { String baseQuery = SystemConfiguration.getProperty("pir.baseQuery"); String jobName = "pirMR_base_" + baseQuery + "_" + System.currentTimeMillis(); job.setJobName(jobName); job.getConfiguration().set("baseQuery", baseQuery); job.getConfiguration().set("query", baseQuery); job.getConfiguration().set("pir.allowAdHocQuerySchemas", SystemConfiguration.getProperty("pir.allowAdHocQuerySchemas", "false")); job.getConfiguration().setBoolean("mapreduce.input.fileinputformat.input.dir.recursive", true); // Set the inputFormatClass based upon the baseInputFormat property String classString = SystemConfiguration.getProperty("pir.baseInputFormat"); Class<BaseInputFormat> inputClass = (Class<BaseInputFormat>) Class.forName(classString); if (!Class.forName("org.apache.pirk.inputformat.hadoop.BaseInputFormat").isAssignableFrom(inputClass)) { throw new Exception("baseInputFormat class = " + classString + " does not extend BaseInputFormat"); } job.setInputFormatClass(inputClass); FileInputFormat.setInputPaths(job, inputFile); } job.setJarByClass(HashSelectorsAndPartitionDataMapper.class); job.setMapperClass(HashSelectorsAndPartitionDataMapper.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(BytesArrayWritable.class); // Set the reducer and output params job.setNumReduceTasks(numReduceTasks); job.setReducerClass(RowCalcReducer.class); // Delete the output directory if it exists if (fs.exists(outPathInit)) { fs.delete(outPathInit, true); } job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(Text.class); FileOutputFormat.setOutputPath(job, outPathInit); job.getConfiguration().set("mapreduce.output.textoutputformat.separator", ","); MultipleOutputs.addNamedOutput(job, FileConst.PIR, TextOutputFormat.class, LongWritable.class, Text.class); // Submit job, wait for completion success = job.waitForCompletion(true); return success; }
From source file:org.apache.pirk.responder.wideskies.mapreduce.ComputeResponseTool.java
License:Apache License
private boolean multiplyColumns(Path outPathInit, Path outPathColumnMult) throws IOException, ClassNotFoundException, InterruptedException { boolean success; Job columnMultJob = Job.getInstance(conf, "pir_columnMult"); columnMultJob.setSpeculativeExecution(false); String columnMultJobName = "pir_columnMult"; // Set the same job configs as for the first iteration columnMultJob.getConfiguration().set("mapreduce.map.memory.mb", SystemConfiguration.getProperty("mapreduce.map.memory.mb", "2000")); columnMultJob.getConfiguration().set("mapreduce.reduce.memory.mb", SystemConfiguration.getProperty("mapreduce.reduce.memory.mb", "2000")); columnMultJob.getConfiguration().set("mapreduce.map.java.opts", SystemConfiguration.getProperty("mapreduce.map.java.opts", "-Xmx1800m")); columnMultJob.getConfiguration().set("mapreduce.reduce.java.opts", SystemConfiguration.getProperty("mapreduce.reduce.java.opts", "-Xmx1800m")); columnMultJob.getConfiguration().set("mapreduce.map.speculative", "false"); columnMultJob.getConfiguration().set("mapreduce.reduce.speculative", "false"); columnMultJob.getConfiguration().set("pirMR.queryInputDir", SystemConfiguration.getProperty("pir.queryInput")); columnMultJob.setJobName(columnMultJobName); columnMultJob.setJarByClass(ColumnMultMapper.class); columnMultJob.setNumReduceTasks(numReduceTasks); // Set the Mapper, InputFormat, and input path columnMultJob.setMapperClass(ColumnMultMapper.class); columnMultJob.setInputFormatClass(TextInputFormat.class); FileStatus[] status = fs.listStatus(outPathInit); for (FileStatus fstat : status) { if (fstat.getPath().getName().startsWith(FileConst.PIR)) { logger.info("fstat.getPath() = " + fstat.getPath().toString()); FileInputFormat.addInputPath(columnMultJob, fstat.getPath()); }//from w ww.ja v a 2 s . c o m } columnMultJob.setMapOutputKeyClass(LongWritable.class); columnMultJob.setMapOutputValueClass(Text.class); // Set the reducer and output options columnMultJob.setReducerClass(ColumnMultReducer.class); columnMultJob.setOutputKeyClass(LongWritable.class); columnMultJob.setOutputValueClass(Text.class); columnMultJob.getConfiguration().set("mapreduce.output.textoutputformat.separator", ","); // Delete the output file, if it exists if (fs.exists(outPathColumnMult)) { fs.delete(outPathColumnMult, true); } FileOutputFormat.setOutputPath(columnMultJob, outPathColumnMult); MultipleOutputs.addNamedOutput(columnMultJob, FileConst.PIR_COLS, TextOutputFormat.class, LongWritable.class, Text.class); // Submit job, wait for completion success = columnMultJob.waitForCompletion(true); return success; }
From source file:org.apache.pirk.responder.wideskies.mapreduce.ComputeResponseTool.java
License:Apache License
private boolean computeFinalResponse(Path outPathFinal) throws ClassNotFoundException, IOException, InterruptedException { boolean success; Job finalResponseJob = Job.getInstance(conf, "pir_finalResponse"); finalResponseJob.setSpeculativeExecution(false); String finalResponseJobName = "pir_finalResponse"; // Set the same job configs as for the first iteration finalResponseJob.getConfiguration().set("mapreduce.map.memory.mb", SystemConfiguration.getProperty("mapreduce.map.memory.mb", "2000")); finalResponseJob.getConfiguration().set("mapreduce.reduce.memory.mb", SystemConfiguration.getProperty("mapreduce.reduce.memory.mb", "2000")); finalResponseJob.getConfiguration().set("mapreduce.map.java.opts", SystemConfiguration.getProperty("mapreduce.map.java.opts", "-Xmx1800m")); finalResponseJob.getConfiguration().set("mapreduce.reduce.java.opts", SystemConfiguration.getProperty("mapreduce.reduce.java.opts", "-Xmx1800m")); finalResponseJob.getConfiguration().set("pirMR.queryInputDir", SystemConfiguration.getProperty("pir.queryInput")); finalResponseJob.getConfiguration().set("pirMR.outputFile", outputFile); finalResponseJob.getConfiguration().set("mapreduce.map.speculative", "false"); finalResponseJob.getConfiguration().set("mapreduce.reduce.speculative", "false"); finalResponseJob.setJobName(finalResponseJobName); finalResponseJob.setJarByClass(ColumnMultMapper.class); finalResponseJob.setNumReduceTasks(1); // Set the Mapper, InputFormat, and input path finalResponseJob.setMapperClass(ColumnMultMapper.class); finalResponseJob.setInputFormatClass(TextInputFormat.class); FileStatus[] status = fs.listStatus(new Path(outputDirColumnMult)); for (FileStatus fstat : status) { if (fstat.getPath().getName().startsWith(FileConst.PIR_COLS)) { logger.info("fstat.getPath() = " + fstat.getPath().toString()); FileInputFormat.addInputPath(finalResponseJob, fstat.getPath()); }/*from w ww . j av a2s. c o m*/ } finalResponseJob.setMapOutputKeyClass(LongWritable.class); finalResponseJob.setMapOutputValueClass(Text.class); // Set the reducer and output options finalResponseJob.setReducerClass(FinalResponseReducer.class); finalResponseJob.setOutputKeyClass(LongWritable.class); finalResponseJob.setOutputValueClass(Text.class); finalResponseJob.getConfiguration().set("mapreduce.output.textoutputformat.separator", ","); // Delete the output file, if it exists if (fs.exists(outPathFinal)) { fs.delete(outPathFinal, true); } FileOutputFormat.setOutputPath(finalResponseJob, outPathFinal); MultipleOutputs.addNamedOutput(finalResponseJob, FileConst.PIR_FINAL, TextOutputFormat.class, LongWritable.class, Text.class); // Submit job, wait for completion success = finalResponseJob.waitForCompletion(true); return success; }
From source file:org.apache.tez.mapreduce.examples.MRRSleepJob.java
License:Apache License
@VisibleForTesting public Job createJob(int numMapper, int numReducer, int iReduceStagesCount, int numIReducer, long mapSleepTime, int mapSleepCount, long reduceSleepTime, int reduceSleepCount, long iReduceSleepTime, int iReduceSleepCount) throws IOException { Configuration conf = getConf(); conf.setLong(MAP_SLEEP_TIME, mapSleepTime); conf.setLong(REDUCE_SLEEP_TIME, reduceSleepTime); conf.setLong(IREDUCE_SLEEP_TIME, iReduceSleepTime); conf.setInt(MAP_SLEEP_COUNT, mapSleepCount); conf.setInt(REDUCE_SLEEP_COUNT, reduceSleepCount); conf.setInt(IREDUCE_SLEEP_COUNT, iReduceSleepCount); conf.setInt(MRJobConfig.NUM_MAPS, numMapper); conf.setInt(IREDUCE_STAGES_COUNT, iReduceStagesCount); conf.setInt(IREDUCE_TASKS_COUNT, numIReducer); // Configure intermediate reduces conf.setInt(org.apache.tez.mapreduce.hadoop.MRJobConfig.MRR_INTERMEDIATE_STAGES, iReduceStagesCount); LOG.info("Running MRR with " + iReduceStagesCount + " IR stages"); for (int i = 1; i <= iReduceStagesCount; ++i) { // Set reducer class for intermediate reduce conf.setClass(/*from w w w . ja v a2 s .c o m*/ MultiStageMRConfigUtil.getPropertyNameForIntermediateStage(i, "mapreduce.job.reduce.class"), ISleepReducer.class, Reducer.class); // Set reducer output key class conf.setClass( MultiStageMRConfigUtil.getPropertyNameForIntermediateStage(i, "mapreduce.map.output.key.class"), IntWritable.class, Object.class); // Set reducer output value class conf.setClass(MultiStageMRConfigUtil.getPropertyNameForIntermediateStage(i, "mapreduce.map.output.value.class"), IntWritable.class, Object.class); conf.setInt(MultiStageMRConfigUtil.getPropertyNameForIntermediateStage(i, "mapreduce.job.reduces"), numIReducer); } Job job = Job.getInstance(conf, "sleep"); job.setNumReduceTasks(numReducer); job.setJarByClass(MRRSleepJob.class); job.setNumReduceTasks(numReducer); job.setMapperClass(SleepMapper.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(IntWritable.class); job.setReducerClass(SleepReducer.class); job.setOutputFormatClass(NullOutputFormat.class); job.setInputFormatClass(SleepInputFormat.class); job.setPartitionerClass(MRRSleepJobPartitioner.class); job.setSpeculativeExecution(false); job.setJobName("Sleep job"); FileInputFormat.addInputPath(job, new Path("ignored")); return job; }