List of usage examples for org.apache.hadoop.mapreduce Job waitForCompletion
public boolean waitForCompletion(boolean verbose) throws IOException, InterruptedException, ClassNotFoundException
From source file:com.datasalt.utils.mapred.joiner.TestMultiJoinerGlob.java
License:Apache License
@Test public void test() throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = getConf(); MultiJoiner multiJoiner = new MultiJoiner("MultiJoiner Test", conf); multiJoiner.setReducer(TestReducer.class); multiJoiner.setOutputKeyClass(Text.class); multiJoiner.setOutputValueClass(Text.class); multiJoiner.setOutputFormat(TextOutputFormat.class); multiJoiner.setOutputPath(new Path(OUTPUT_FOR_TEST)); Job job = multiJoiner .addChanneledInput(0, new Path("src/test/resources/glob-folder/*"), A.class, TextInputFormat.class, AMapper.class) .addChanneledInput(1, new Path("src/test/resources/multijoiner.test.b.txt"), B.class, TextInputFormat.class, BMapper.class) .getJob();//from w w w . j av a 2s .c o m job.waitForCompletion(true); assertTrue(job.isSuccessful()); HadoopUtils.deleteIfExists(FileSystem.get(conf), new Path(OUTPUT_FOR_TEST)); }
From source file:com.datasalt.utils.mapred.joiner.TestMultiJoinerMultiChannel.java
License:Apache License
@Test public void test() throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = getConf(); MultiJoiner multiJoiner = new MultiJoiner("MultiJoiner Test", conf); multiJoiner.setReducer(TestReducer.class); multiJoiner.setOutputKeyClass(Text.class); multiJoiner.setOutputValueClass(Text.class); multiJoiner.setOutputFormat(TextOutputFormat.class); multiJoiner.setOutputPath(new Path(OUTPUT_FOR_TEST)); Job job = multiJoiner.addInput(new Path("src/test/resources/multijoiner.test.a.txt"), TextInputFormat.class, ABMapper.class).setChannelDatumClass(0, A.class).setChannelDatumClass(1, B.class).getJob(); job.waitForCompletion(true); assertTrue(job.isSuccessful());/*from w ww .j a va 2 s. c o m*/ HadoopUtils.deleteIfExists(FileSystem.get(conf), new Path(OUTPUT_FOR_TEST)); }
From source file:com.datasalt.utils.mapred.joiner.TestMultiJoinerSameClass.java
License:Apache License
@Test public void test() throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = getConf(); MultiJoiner multiJoiner = new MultiJoiner("MultiJoiner Test", conf); multiJoiner.setReducer(TestReducerSameClass.class); multiJoiner.setOutputKeyClass(Text.class); multiJoiner.setOutputValueClass(Text.class); multiJoiner.setOutputFormat(TextOutputFormat.class); multiJoiner.setOutputPath(new Path(OUTPUT_FOR_TEST)); Job job = multiJoiner .addChanneledInput(0, new Path("src/test/resources/multijoiner.test.a.txt"), A.class, TextInputFormat.class, AMapperSameClass.class) .addChanneledInput(1, new Path("src/test/resources/multijoiner.test.same.class.a.txt"), A.class, TextInputFormat.class, AMapperSameClass.class) .getJob();/*from ww w. ja va 2 s. c o m*/ job.waitForCompletion(true); assertTrue(job.isSuccessful()); HadoopUtils.deleteIfExists(FileSystem.get(conf), new Path(OUTPUT_FOR_TEST)); }
From source file:com.datasalt.utils.mapred.joiner.TestMultiJoinerSecondarySort.java
License:Apache License
@Test public void test() throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = getConf(); MultiJoiner multiJoiner = new MultiJoiner("MultiJoiner Test", conf); multiJoiner.setReducer(TestReducerSecondarySort.class); multiJoiner.setOutputKeyClass(Text.class); multiJoiner.setOutputValueClass(Text.class); multiJoiner.setOutputFormat(TextOutputFormat.class); multiJoiner.setOutputPath(new Path(OUTPUT_FOR_TEST)); Job job = multiJoiner.setMultiJoinPairClass(MultiJoinPairText.class) .addChanneledInput(0, new Path("src/test/resources/multijoiner.test.a.2.txt"), A.class, TextInputFormat.class, AMapperSecondarySort.class) .addChanneledInput(1, new Path("src/test/resources/multijoiner.test.b.2.txt"), B.class, TextInputFormat.class, BMapperSecondarySort.class) .getJob();//from w ww . j a v a2 s . c o m job.waitForCompletion(true); assertTrue(job.isSuccessful()); HadoopUtils.deleteIfExists(FileSystem.get(conf), new Path(OUTPUT_FOR_TEST)); }
From source file:com.declum.squzer.example.hbase.table2file.Export.java
License:Apache License
/** * Main entry point./* ww w . j ava2 s . c om*/ * * @param args * The command line parameters. * @throws Exception * When running the job fails. */ public static void main(String[] args) throws Exception { args = new String[] { "test", "Out" }; Configuration conf = HBaseConfiguration.create(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length < 2) { usage("Wrong number of arguments: " + otherArgs.length); System.exit(-1); } Job job = createSubmittableJob(conf, otherArgs); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.digitalpebble.behemoth.io.sequencefile.SequenceFileConverterJob.java
License:Apache License
public int run(String[] args) throws Exception { int result = 0; addInputOption();/* w w w .j a v a 2s. co m*/ addOutputOption(); if (parseArguments(args) == null) { return -1; } Path input = getInputPath(); Path output = getOutputPath(); Job job = prepareJob(input, output, SequenceFileInputFormat.class, SequenceFileConverterMapper.class, Text.class, BehemothDocument.class, SequenceFileOutputFormat.class); job.setJobName("Convert Sequence File: " + input); job.waitForCompletion(true); if (log.isInfoEnabled()) { log.info("Conversion: done"); } return result; }
From source file:com.digitalpebble.behemoth.mahout.BehemothDocumentProcessor.java
License:Apache License
/** * Convert the input documents into token array using the * {@link StringTuple} The input documents has to be in the * {@link org.apache.hadoop.io.SequenceFile} format * /*from w w w .j a va 2 s . c o m*/ * @param input * input directory of the documents in * {@link org.apache.hadoop.io.SequenceFile} format * @param output * output directory were the {@link StringTuple} token array of * each document has to be created * @param type * The annotation type representing the tokens * @param feature * The name of the features holding the token value * @throws IOException * @throws ClassNotFoundException * @throws InterruptedException */ public static void tokenizeDocuments(Path input, String type, String feature, Path output) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(); // this conf parameter needs to be set enable serialisation of conf // values conf.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization," + "org.apache.hadoop.io.serializer.WritableSerialization"); conf.set(TOKEN_TYPE, type); conf.set(FEATURE_NAME, feature); Job job = new Job(conf); job.setJobName("DocumentProcessor::BehemothTokenizer: input-folder: " + input); job.setJarByClass(BehemothDocumentProcessor.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(StringTuple.class); FileInputFormat.setInputPaths(job, input); FileOutputFormat.setOutputPath(job, output); job.setMapperClass(BehemothTokenizerMapper.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setNumReduceTasks(0); job.setOutputFormatClass(SequenceFileOutputFormat.class); HadoopUtil.delete(conf, output); boolean succeeded = job.waitForCompletion(true); if (!succeeded) throw new IllegalStateException("Job failed!"); }
From source file:com.digitalpebble.behemoth.mahout.BehemothDocumentProcessor.java
License:Apache License
/** * Convert the input documents into token array using the * {@link StringTuple} The input documents has to be in the * {@link org.apache.hadoop.io.SequenceFile} format * /*from ww w. j av a 2s. c om*/ * @param input * input directory of the documents in * {@link org.apache.hadoop.io.SequenceFile} format * @param output * output directory were the {@link StringTuple} token array of * each document has to be created * @param analyzerClass * The Lucene {@link Analyzer} for tokenizing the UTF-8 text */ public static void tokenizeDocuments(Path input, Class<? extends Analyzer> analyzerClass, Path output, Configuration baseConf) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(baseConf); // this conf parameter needs to be set enable serialisation of conf // values conf.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization," + "org.apache.hadoop.io.serializer.WritableSerialization"); conf.set(ANALYZER_CLASS, analyzerClass.getName()); Job job = new Job(conf); job.setJobName("DocumentProcessor::LuceneTokenizer: input-folder: " + input); job.setJarByClass(BehemothDocumentProcessor.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(StringTuple.class); FileInputFormat.setInputPaths(job, input); FileOutputFormat.setOutputPath(job, output); job.setMapperClass(LuceneTokenizerMapper.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setNumReduceTasks(0); job.setOutputFormatClass(SequenceFileOutputFormat.class); HadoopUtil.delete(conf, output); boolean succeeded = job.waitForCompletion(true); if (!succeeded) throw new IllegalStateException("Job failed!"); }
From source file:com.digitalpebble.behemoth.mahout.BehemothDocumentProcessor.java
License:Apache License
public static void dumpLabels(Path input, Path output, Configuration baseConf) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(baseConf); // this conf parameter needs to be set enable serialisation of conf // values/*from w w w . j av a 2 s.co m*/ conf.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization," + "org.apache.hadoop.io.serializer.WritableSerialization"); Job job = new Job(conf); job.setJobName("DocumentProcessor::LabelDumper: input-folder: " + input); job.setJarByClass(BehemothDocumentProcessor.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileInputFormat.setInputPaths(job, input); FileOutputFormat.setOutputPath(job, output); job.setMapperClass(BehemothLabelMapper.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setNumReduceTasks(0); job.setOutputFormatClass(SequenceFileOutputFormat.class); HadoopUtil.delete(conf, output); boolean succeeded = job.waitForCompletion(true); if (!succeeded) throw new IllegalStateException("Job failed!"); }
From source file:com.digitalpebble.behemoth.mahout.DocumentProcessor.java
License:Apache License
/** * Convert the input documents into token array using the * {@link StringTuple} The input documents has to be in the * {@link org.apache.hadoop.io.SequenceFile} format * /* ww w .j a v a 2 s . c o m*/ * @param input * input directory of the documents in * {@link org.apache.hadoop.io.SequenceFile} format * @param output * output directory were the {@link StringTuple} token array of * each document has to be created * @param type * The annotation type representing the tokens * @param feature * The name of the features holding the token value * @throws IOException * @throws ClassNotFoundException * @throws InterruptedException */ public static void tokenizeDocuments(Path input, String type, String feature, Path output) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(); // this conf parameter needs to be set enable serialisation of conf // values conf.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization," + "org.apache.hadoop.io.serializer.WritableSerialization"); conf.set(TOKEN_TYPE, type); conf.set(FEATURE_NAME, feature); Job job = new Job(conf); job.setJobName("DocumentProcessor::DocumentTokenizer: input-folder: " + input); job.setJarByClass(DocumentProcessor.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(StringTuple.class); FileInputFormat.setInputPaths(job, input); FileOutputFormat.setOutputPath(job, output); job.setMapperClass(SequenceFileTokenizerMapper.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setNumReduceTasks(0); job.setOutputFormatClass(SequenceFileOutputFormat.class); HadoopUtil.delete(conf, output); job.waitForCompletion(true); }