List of usage examples for org.apache.hadoop.mapreduce Job waitForCompletion
public boolean waitForCompletion(boolean verbose) throws IOException, InterruptedException, ClassNotFoundException
From source file:com.iflytek.spider.parse.ParseSegment.java
License:Apache License
public void parse(Path segment) throws IOException, InterruptedException, ClassNotFoundException { if (LOG.isInfoEnabled()) { LOG.info("Parse: starting"); LOG.info("Parse: segment: " + segment); }/* w ww.j a va 2 s.co m*/ Job job = AvroJob.getAvroJob(getConf()); job.setJobName("parse " + segment); FileInputFormat.addInputPath(job, new Path(segment, Content.DIR_NAME)); job.getConfiguration().set(Spider.SEGMENT_NAME_KEY, segment.getName()); job.setInputFormatClass(AvroPairInputFormat.class); job.setMapperClass(ParseMapper.class); FileOutputFormat.setOutputPath(job, segment); job.setOutputFormatClass(ParseOutputFormat.class); job.setOutputKeyClass(String.class); job.setOutputValueClass(UnionData.class); job.waitForCompletion(true); if (LOG.isInfoEnabled()) { LOG.info("Parse: done"); } }
From source file:com.igalia.wordcount.WordCount.java
License:Open Source License
public int run(String[] arg0) throws Exception { Job job = new Job(getConf()); job.setJarByClass(WordCount.class); job.setJobName("wordcount"); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setMapperClass(MapClass.class); job.setReducerClass(Reduce.class); FileInputFormat.setInputPaths(job, new Path("/tmp/wordcount/in")); FileOutputFormat.setOutputPath(job, new Path("/tmp/wordcount/out")); boolean success = job.waitForCompletion(true); return success ? 0 : 1; }
From source file:com.ikanow.aleph2.analytics.hadoop.services.LocalBeJobLauncher.java
License:Apache License
@Override public void launch(Job job) throws ClassNotFoundException, IOException, InterruptedException { job.waitForCompletion(true); //super.launch(job); }
From source file:com.ikanow.infinit.e.core.mapreduce.HadoopJobRunner.java
License:Open Source License
@SuppressWarnings({ "unchecked", "rawtypes" }) private String runHadoopJob(CustomMapReduceJobPojo job, String tempJarLocation) throws IOException, SAXException, ParserConfigurationException { StringWriter xml = new StringWriter(); createConfigXML(xml, job.jobtitle, job.inputCollection, getQueryOrProcessing(job.query, QuerySpec.INPUTFIELDS), job.isCustomTable, job.getOutputDatabase(), job._id.toString(), job.outputCollectionTemp, job.mapper, job.reducer, job.combiner, getQueryOrProcessing(job.query, QuerySpec.QUERY), job.communityIds, job.outputKey, job.outputValue, job.arguments);//from ww w .j a va 2 s.c o m ClassLoader savedClassLoader = Thread.currentThread().getContextClassLoader(); URLClassLoader child = new URLClassLoader(new URL[] { new File(tempJarLocation).toURI().toURL() }, savedClassLoader); Thread.currentThread().setContextClassLoader(child); // Now load the XML into a configuration object: Configuration config = new Configuration(); try { DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance(); DocumentBuilder dBuilder = dbFactory.newDocumentBuilder(); Document doc = dBuilder.parse(new ByteArrayInputStream(xml.toString().getBytes())); NodeList nList = doc.getElementsByTagName("property"); for (int temp = 0; temp < nList.getLength(); temp++) { Node nNode = nList.item(temp); if (nNode.getNodeType() == Node.ELEMENT_NODE) { Element eElement = (Element) nNode; String name = getTagValue("name", eElement); String value = getTagValue("value", eElement); if ((null != name) && (null != value)) { config.set(name, value); } } } } catch (Exception e) { throw new IOException(e.getMessage()); } // Now run the JAR file try { config.setBoolean("mapred.used.genericoptionsparser", true); // (just stops an annoying warning from appearing) if (bLocalMode) { config.set("mapred.job.tracker", "local"); config.set("fs.default.name", "local"); } else { String trackerUrl = HadoopUtils.getXMLProperty( prop_custom.getHadoopConfigPath() + "/hadoop/mapred-site.xml", "mapred.job.tracker"); String fsUrl = HadoopUtils.getXMLProperty( prop_custom.getHadoopConfigPath() + "/hadoop/core-site.xml", "fs.default.name"); config.set("mapred.job.tracker", trackerUrl); config.set("fs.default.name", fsUrl); } Job hj = new Job(config); Class<?> classToLoad = Class.forName(job.mapper, true, child); hj.setJarByClass(classToLoad); hj.setInputFormatClass((Class<? extends InputFormat>) Class .forName("com.ikanow.infinit.e.data_model.custom.InfiniteMongoInputFormat", true, child)); if ((null != job.exportToHdfs) && job.exportToHdfs) { hj.setOutputFormatClass((Class<? extends OutputFormat>) Class .forName("org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat", true, child)); Path outPath = this.ensureOutputDirectory(job); SequenceFileOutputFormat.setOutputPath(hj, outPath); } else { // normal case, stays in MongoDB hj.setOutputFormatClass((Class<? extends OutputFormat>) Class .forName("com.mongodb.hadoop.MongoOutputFormat", true, child)); } hj.setMapperClass((Class<? extends Mapper>) Class.forName(job.mapper, true, child)); if ((null != job.reducer) && !job.reducer.equalsIgnoreCase("null") && !job.reducer.equalsIgnoreCase("none")) { hj.setReducerClass((Class<? extends Reducer>) Class.forName(job.reducer, true, child)); } else { hj.setNumReduceTasks(0); } if ((null != job.combiner) && !job.combiner.equalsIgnoreCase("null") && !job.combiner.equalsIgnoreCase("none")) { hj.setCombinerClass((Class<? extends Reducer>) Class.forName(job.combiner, true, child)); } hj.setOutputKeyClass(Class.forName(job.outputKey, true, child)); hj.setOutputValueClass(Class.forName(job.outputValue, true, child)); hj.setJobName(job.jobtitle); if (bLocalMode) { hj.waitForCompletion(false); return "local_done"; } else { hj.submit(); String jobId = hj.getJobID().toString(); return jobId; } } catch (Exception e) { e.printStackTrace(); Thread.currentThread().setContextClassLoader(savedClassLoader); return "Error: " + HarvestExceptionUtils.createExceptionMessage(e); } finally { Thread.currentThread().setContextClassLoader(savedClassLoader); } }
From source file:com.impetus.code.examples.hadoop.cassandra.wordcount.WordCount.java
License:Apache License
public int run(String[] args) throws Exception { String outputReducerType = "cassandra"; if (args != null && args[0].startsWith(OUTPUT_REDUCER_VAR)) { String[] s = args[0].split("="); if (s != null && s.length == 2) outputReducerType = s[1];//from ww w . j a v a2s . c om } logger.info("output reducer type: " + outputReducerType); for (int i = 0; i < WordCountSetup.TEST_COUNT; i++) { String columnName = "text" + i; getConf().set(CONF_COLUMN_NAME, columnName); Job job = new Job(getConf(), "wordcount"); job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); if (outputReducerType.equalsIgnoreCase("filesystem")) { job.setCombinerClass(ReducerToFilesystem.class); job.setReducerClass(ReducerToFilesystem.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileOutputFormat.setOutputPath(job, new Path(OUTPUT_PATH_PREFIX + i)); } else { job.setReducerClass(ReducerToCassandra.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(ByteBuffer.class); job.setOutputValueClass(List.class); job.setOutputFormatClass(ColumnFamilyOutputFormat.class); ConfigHelper.setOutputColumnFamily(job.getConfiguration(), KEYSPACE, OUTPUT_COLUMN_FAMILY); } job.setInputFormatClass(ColumnFamilyInputFormat.class); ConfigHelper.setRpcPort(job.getConfiguration(), "9160"); ConfigHelper.setInitialAddress(job.getConfiguration(), "localhost"); ConfigHelper.setPartitioner(job.getConfiguration(), "org.apache.cassandra.dht.RandomPartitioner"); ConfigHelper.setInputColumnFamily(job.getConfiguration(), KEYSPACE, INPUT_COLUMN_FAMILY); SlicePredicate predicate = new SlicePredicate() .setColumn_names(Arrays.asList(ByteBufferUtil.bytes(columnName))); ConfigHelper.setInputSlicePredicate(job.getConfiguration(), predicate); job.waitForCompletion(true); } return 0; }
From source file:com.impetus.code.examples.hadoop.cassandra.wordcount.WordCountCounters.java
License:Apache License
public int run(String[] args) throws Exception { Job job = new Job(getConf(), "wordcountcounters"); job.setJarByClass(WordCountCounters.class); job.setMapperClass(SumMapper.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); FileOutputFormat.setOutputPath(job, new Path(OUTPUT_PATH_PREFIX)); job.setInputFormatClass(ColumnFamilyInputFormat.class); ConfigHelper.setRpcPort(job.getConfiguration(), "9160"); ConfigHelper.setInitialAddress(job.getConfiguration(), "localhost"); ConfigHelper.setPartitioner(job.getConfiguration(), "org.apache.cassandra.dht.RandomPartitioner"); ConfigHelper.setInputColumnFamily(job.getConfiguration(), WordCount.KEYSPACE, WordCountCounters.COUNTER_COLUMN_FAMILY); SlicePredicate predicate = new SlicePredicate() .setSlice_range(new SliceRange().setStart(ByteBufferUtil.EMPTY_BYTE_BUFFER) .setFinish(ByteBufferUtil.EMPTY_BYTE_BUFFER).setCount(100)); ConfigHelper.setInputSlicePredicate(job.getConfiguration(), predicate); job.waitForCompletion(true); return 0;/*from w w w . j a v a2 s . c o m*/ }
From source file:com.impetus.code.examples.hadoop.mapred.earthquake.EarthQuakeAnalyzer.java
License:Apache License
public static void main(String[] args) throws Throwable { Job job = new Job(); job.setJarByClass(EarthQuakeAnalyzer.class); FileInputFormat.addInputPath(job, new Path("src/main/resources/eq/input")); FileOutputFormat.setOutputPath(job, new Path("src/main/resources/eq/output")); job.setMapperClass(EarthQuakeMapper.class); job.setReducerClass(EarthQuakeReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.inmobi.conduit.distcp.tools.DistCp.java
License:Apache License
/** * Implements the core-execution. Creates the file-list for copy, * and launches the Hadoop-job, to do the copy. * @return Job handle/*www . j ava 2 s .c o m*/ * @throws Exception, on failure. */ public Job execute() throws Exception { assert inputOptions != null; assert getConf() != null; Job job = null; try { metaFolder = createMetaFolderPath(); jobFS = metaFolder.getFileSystem(getConf()); job = createJob(); createInputFileListing(job); job.submit(); submitted = true; } finally { if (!submitted) { cleanup(); } } String jobID = getJobID(job); job.getConfiguration().set(DistCpConstants.CONF_LABEL_DISTCP_JOB_ID, jobID); LOG.info("DistCp job-id: " + jobID); LOG.info("DistCp job may be tracked at: " + job.getTrackingURL()); LOG.info("To cancel, run the following command:\thadoop job -kill " + jobID); long jobStartTime = System.nanoTime(); if (inputOptions.shouldBlock() && !job.waitForCompletion(true)) { updateJobTimeInNanos(jobStartTime); throw new IOException("DistCp failure: Job " + jobID + " has failed. "); } updateJobTimeInNanos(jobStartTime); return job; }
From source file:com.inmobi.conduit.distcp.tools.TestDistCp.java
License:Apache License
public void testUniformSizeDistCp() throws Exception { try {/*from www. j a v a 2 s .c o m*/ clearState(); Path sourcePath = new Path(SOURCE_PATH).makeQualified(cluster.getFileSystem()); List<Path> sources = new ArrayList<Path>(); sources.add(sourcePath); Path targetPath = new Path(TARGET_PATH).makeQualified(cluster.getFileSystem()); DistCpOptions options = new DistCpOptions(sources, targetPath); options.setOutPutDirectory(counterOutputPath); options.setAtomicCommit(true); options.setBlocking(false); Job job = new DistCp(configuration, options).execute(); Path workDir = CopyOutputFormat.getWorkingDirectory(job); Path finalDir = CopyOutputFormat.getCommitDirectory(job); while (!job.isComplete()) { if (cluster.getFileSystem().exists(workDir)) { break; } } job.waitForCompletion(true); Assert.assertFalse(cluster.getFileSystem().exists(workDir)); Assert.assertTrue(cluster.getFileSystem().exists(finalDir)); Assert.assertFalse(cluster.getFileSystem() .exists(new Path(job.getConfiguration().get(DistCpConstants.CONF_LABEL_META_FOLDER)))); verifyResults(); } catch (Exception e) { LOG.error("Exception encountered", e); Assert.fail("Unexpected exception: " + e.getMessage()); } }
From source file:com.inmobi.conduit.distcp.tools.TestDistCp.java
License:Apache License
@Test public void testDynamicDistCp() throws Exception { try {// w ww . java 2s. com clearState(); Path sourcePath = new Path(SOURCE_PATH).makeQualified(cluster.getFileSystem()); List<Path> sources = new ArrayList<Path>(); sources.add(sourcePath); Path targetPath = new Path(TARGET_PATH).makeQualified(cluster.getFileSystem()); DistCpOptions options = new DistCpOptions(sources, targetPath); options.setCopyStrategy("dynamic"); options.setOutPutDirectory(counterOutputPath); options.setAtomicCommit(true); options.setAtomicWorkPath(new Path("/work")); options.setBlocking(false); Job job = new DistCp(configuration, options).execute(); Path workDir = CopyOutputFormat.getWorkingDirectory(job); Path finalDir = CopyOutputFormat.getCommitDirectory(job); while (!job.isComplete()) { if (cluster.getFileSystem().exists(workDir)) { break; } } job.waitForCompletion(true); Assert.assertFalse(cluster.getFileSystem().exists(workDir)); Assert.assertTrue(cluster.getFileSystem().exists(finalDir)); verifyResults(); } catch (Exception e) { LOG.error("Exception encountered", e); Assert.fail("Unexpected exception: " + e.getMessage()); } }