List of usage examples for org.apache.hadoop.mapreduce Job setOutputValueClass
public void setOutputValueClass(Class<?> theClass) throws IllegalStateException
From source file:com.metamx.druid.indexer.IndexGeneratorJob.java
License:Open Source License
public boolean run() { try {/* w w w.java2s. c om*/ Job job = new Job(new Configuration(), String.format("%s-index-generator-%s", config.getDataSource(), config.getIntervals())); job.getConfiguration().set("io.sort.record.percent", "0.23"); for (String propName : System.getProperties().stringPropertyNames()) { Configuration conf = job.getConfiguration(); if (propName.startsWith("hadoop.")) { conf.set(propName.substring("hadoop.".length()), System.getProperty(propName)); } } job.setInputFormatClass(TextInputFormat.class); job.setMapperClass(IndexGeneratorMapper.class); job.setMapOutputValueClass(Text.class); SortableBytes.useSortableBytesAsMapOutputKey(job); job.setNumReduceTasks(Iterables.size(config.getAllBuckets())); job.setPartitionerClass(IndexGeneratorPartitioner.class); job.setReducerClass(IndexGeneratorReducer.class); job.setOutputKeyClass(BytesWritable.class); job.setOutputValueClass(Text.class); job.setOutputFormatClass(IndexGeneratorOutputFormat.class); FileOutputFormat.setOutputPath(job, config.makeIntermediatePath()); config.addInputPaths(job); config.intoConfiguration(job); job.setJarByClass(IndexGeneratorJob.class); job.submit(); log.info("Job %s submitted, status available at %s", job.getJobName(), job.getTrackingURL()); boolean success = job.waitForCompletion(true); Counter invalidRowCount = job.getCounters() .findCounter(HadoopDruidIndexerConfig.IndexJobCounters.INVALID_ROW_COUNTER); jobStats.setInvalidRowCount(invalidRowCount.getValue()); return success; } catch (Exception e) { throw new RuntimeException(e); } }
From source file:com.michaeljones.hellohadoopworldmaven.HelloMapReduce.java
public static Job RunJobAsync(Path inputPath, Path outputPath, Configuration conf) throws Exception { Job job = Job.getInstance(conf, "word count"); job.setJarByClass(HelloMapReduce.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, inputPath); FileOutputFormat.setOutputPath(job, outputPath); return job;/*from w ww. j a va 2 s .c o m*/ }
From source file:com.michaeljones.hellohadoopworldmaven.HelloMapReduce.java
public static Job RunJobAnalysisAsync(Path inputPath, Path outputPath, Configuration conf) throws Exception { Job job = Job.getInstance(conf, "word count"); job.setJarByClass(HelloMapReduce.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumCombinerAnalyser.class); job.setReducerClass(IntSumReducerAnalyser.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, inputPath); FileOutputFormat.setOutputPath(job, outputPath); return job;/*from w w w . j a va 2s. c o m*/ }
From source file:com.ml.hadoop.nlp.DocumentProcessor.java
License:Apache License
/** * Convert the input documents into token array using the {@link StringTuple} The input documents has to be * in the {@link org.apache.hadoop.io.SequenceFile} format * // w w w . ja va 2 s. c o m * @param input * input directory of the documents in {@link org.apache.hadoop.io.SequenceFile} format * @param output * output directory were the {@link StringTuple} token array of each document has to be created * @param analyzerClass * The Lucene {@link Analyzer} for tokenizing the UTF-8 text */ public static void tokenizeDocuments(Path input, Class<? extends Analyzer> analyzerClass, Path output, Configuration baseConf) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(baseConf); // this conf parameter needs to be set enable serialisation of conf values conf.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization," + "org.apache.hadoop.io.serializer.WritableSerialization"); conf.set(ANALYZER_CLASS, analyzerClass.getName()); Job job = new Job(conf); job.setJobName("DocumentProcessor::DocumentTokenizer: input-folder: " + input); job.setJarByClass(DocumentProcessor.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(StringTuple.class); FileInputFormat.setInputPaths(job, input); FileOutputFormat.setOutputPath(job, output); job.setMapperClass(SequenceFileTokenizerMapper.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setNumReduceTasks(0); job.setOutputFormatClass(SequenceFileOutputFormat.class); HadoopUtil.delete(conf, output); boolean succeeded = job.waitForCompletion(true); if (!succeeded) { throw new IllegalStateException("Job failed!"); } }
From source file:com.ML_Hadoop.K_meansClustering.K_meansClusteringMapReduce.java
public static void main(String[] args) throws Exception { int iteration = 0, num_of_iteration = 30; int feature_size = 2; FileSystem fs;/*from w w w . j a va 2 s. c o m*/ int number_of_clusters = 2; do { Configuration conf = new Configuration(); fs = FileSystem.get(conf); Job job = new Job(conf, "K_meansClusteringMapReduce"); job.setJarByClass(K_meansClusteringMapReduce.class); conf = job.getConfiguration(); // This line is mandatory. job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(FloatArrayWritable.class); job.setMapperClass(K_meansClusteringMap.class); job.setReducerClass(K_meansClusteringReduce.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setNumReduceTasks(1); // set number of reducers to one. FileInputFormat.addInputPath(job, new Path(args[0])); Path out = new Path(args[1]); if (fs.exists(out)) fs.delete(out, true); FileOutputFormat.setOutputPath(job, out); number_of_clusters = Integer.parseInt(args[2]); num_of_iteration = Integer.parseInt(args[3]); feature_size = Integer.parseInt(args[4]); conf.setInt("number_of_clusters", number_of_clusters); conf.setInt("feature_size", feature_size); conf.setInt("current_iteration_num", iteration); try { job.waitForCompletion(true); iteration++; } catch (IOException e) { e.printStackTrace(); } } while (iteration < num_of_iteration); }
From source file:com.ML_Hadoop.MultipleLinearRegression.MultipleLinearRegressionMapReduce.java
public static void main(String[] args) throws Exception { String[] theta;//ww w . java 2s .c o m int iteration = 0, num_of_iteration = 1; int feature_size = 0, input_data_size = 0; FileSystem fs; Float alpha = 0.1f; do { Configuration conf = new Configuration(); fs = FileSystem.get(conf); Job job = new Job(conf, "LinearRegressionMapReduce"); job.setJarByClass(MultipleLinearRegressionMapReduce.class); // the following two lines are needed for propagating "theta" conf = job.getConfiguration(); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(FloatWritable.class); job.setMapperClass(MultipleLinearRegressionMap.class); job.setReducerClass(MultipleLinearRegressionReduce.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setNumReduceTasks(1); // set mapred.reduce.tasks = 1 (only one reducer) FileInputFormat.addInputPath(job, new Path(args[0])); Path out = new Path(args[1]); if (fs.exists(out)) fs.delete(out, true); FileOutputFormat.setOutputPath(job, out); alpha = Float.parseFloat(args[2]); num_of_iteration = Integer.parseInt(args[3]); feature_size = Integer.parseInt(args[4]); input_data_size = Integer.parseInt(args[5]); conf.setFloat("alpha", alpha); conf.setInt("feature_size", feature_size); conf.setInt("input_data_size", input_data_size); conf.setInt("iteration", iteration); theta = new String[feature_size]; if (iteration == 0) { // first iteration for (int i = 0; i < theta.length; i++) theta[i] = "0.0"; conf.setStrings("theta", theta); } else { try { String uri = "/user/hduser/theta.txt"; fs = FileSystem.get(conf); //FSDataInputStream in = fs.open(new Path(uri)); BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(new Path(uri)))); theta = br.readLine().split(","); } catch (Exception e) { } conf.setStrings("theta", theta); } for (int i = 0; i < theta.length; i++) System.out.println("In MapRedce main function: theta[ " + i + " ]" + theta[i]); try { job.waitForCompletion(true); iteration++; } catch (IOException e) { e.printStackTrace(); } } while (iteration < num_of_iteration); }
From source file:com.ML_Hadoop.NaiveBayesClassifier_Continuous_Features.NaiveBayesClassifierMapReduce_Continuous_Features.java
/** * @param args/* ww w . jav a 2 s . c o m*/ * @throws IOException * @throws ClassNotFoundException * @throws InterruptedException */ public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { int number_of_classes = 1; int number_of_features = 1; Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); Job job = new Job(conf, "NaiveBayesClassifierMapReduce_Continuous_Features"); job.setJarByClass(NaiveBayesClassifierMapReduce_Continuous_Features.class); conf = job.getConfiguration(); // This line is mandatory. job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(FloatArrayWritable.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(MapArrayWritable.class); job.setMapperClass(NaiveBayesClassifierMap_Continuous_Features.class); job.setReducerClass(NaiveBayesClassifierReduce_Continuous_Features.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setNumReduceTasks(1); FileInputFormat.addInputPath(job, new Path(args[0])); Path out = new Path(args[1]); if (fs.exists(out)) fs.delete(out, true); FileOutputFormat.setOutputPath(job, out); number_of_classes = Integer.parseInt(args[2]); number_of_features = Integer.parseInt(args[3]); conf.setInt("number_of_classes", number_of_classes); conf.setInt("number_of_features", number_of_features); try { job.waitForCompletion(true); } catch (IOException e) { e.printStackTrace(); } }
From source file:com.mongodb.hadoop.examples.snmp.SnmpStatisticMongoTool.java
License:Apache License
@Override public int run(String[] args) throws Exception { final Configuration conf = getConf(); final com.mongodb.MongoURI outputUri = MongoConfigUtil.getOutputURI(conf); if (outputUri == null) throw new IllegalStateException("output uri is not set"); if (MongoConfigUtil.getInputURI(conf) == null) throw new IllegalStateException("input uri is not set"); final String outputCollectionName = outputUri.getCollection(); final Job job = new Job(conf, "snmp analysis " + outputCollectionName); job.setJarByClass(SnmpStatisticMongoTool.class); job.setMapperClass(MapHostUploadEachAPEachDay.class); job.setReducerClass(ReduceHostUploadEachAPEachDay.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); job.setInputFormatClass(MongoInputFormat.class); job.setOutputFormatClass(MongoOutputFormat.class); boolean result = job.waitForCompletion(true); return (result ? 0 : 1); }
From source file:com.mongodb.hadoop.examples.snmp.SnmpStatisticMongoTool.java
License:Apache License
public static void main(String[] args) throws Exception { boolean use_shards = true; boolean use_chunks = false; final Configuration conf = new Configuration(); String output_table = null;//from ww w.j a v a2 s .c om MongoConfigUtil.setInputURI(conf, "mongodb://localhost:30000/test.snmp"); conf.setBoolean(MongoConfigUtil.SPLITS_USE_SHARDS, use_shards); conf.setBoolean(MongoConfigUtil.SPLITS_USE_CHUNKS, use_chunks); if (use_chunks) { if (use_shards) output_table = "snmp_with_shards_and_chunks"; else output_table = "snmp_with_chunks"; } else { if (use_shards) output_table = "snmp_with_shards"; else output_table = "snmp_no_splits"; } MongoConfigUtil.setOutputURI(conf, "mongodb://localhost:30000/test." + output_table); final Job job = new Job(conf, "snmp analysis " + output_table); job.setJarByClass(SnmpStatisticMongoTool.class); job.setMapperClass(MapHostUploadEachAPEachDay.class); job.setReducerClass(ReduceHostUploadEachAPEachDay.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); job.setInputFormatClass(MongoInputFormat.class); job.setOutputFormatClass(MongoOutputFormat.class); boolean result = job.waitForCompletion(true); System.exit(result ? 0 : 1); }
From source file:com.mongodb.hadoop.examples.snmp.SnmpStatisticWithCombiner.java
License:Apache License
@Override public int run(String[] args) throws Exception { final Configuration conf = getConf(); final com.mongodb.MongoURI outputUri = MongoConfigUtil.getOutputURI(conf); if (outputUri == null) throw new IllegalStateException("output uri is not set"); if (MongoConfigUtil.getInputURI(conf) == null) throw new IllegalStateException("input uri is not set"); final String outputCollectionName = outputUri.getCollection(); final Job job = new Job(conf, "snmp analysis " + outputCollectionName); job.setJarByClass(SnmpStatisticWithCombiner.class); job.setMapperClass(MapHostUploadOnEachAPPerDay.class); job.setCombinerClass(CombineHostUploadOnEachAPPerDay.class); job.setReducerClass(ReduceHostUploadOnEachAPPerDay.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); job.setInputFormatClass(MongoInputFormat.class); job.setOutputFormatClass(MongoOutputFormat.class); try {//from w w w .j a va2 s . c o m boolean result = job.waitForCompletion(true); System.out.println("job.waitForCompletion( true ) returned " + result); } catch (Exception e) { System.out.println("job.waitForCompletion( true ) threw Exception"); e.printStackTrace(); } return 0; }