List of usage examples for org.apache.hadoop.mapred FileInputFormat setInputPaths
public static void setInputPaths(JobConf conf, Path... inputPaths)
From source file:adept.mapreduce.MapReduce.java
License:Apache License
public JobConf getConfiguration(String inputPath, String outputPath, String mapClass) throws Exception { //Configuration conf = getConf(); Class thisclass = getClass(); JobConf job = new JobConf(new Configuration(), thisclass); try {//w ww.jav a2 s . c o m Path in = new Path(inputPath); Path out = new Path(outputPath); FileInputFormat.setInputPaths(job, in); FileOutputFormat.setOutputPath(job, out); job.setJobName("Algorithm Map-Reduce"); job.setMapperClass((Class<? extends Mapper>) Class.forName(mapClass)); } catch (Exception e) { throw new RuntimeException("Exception occurred: " + e.getMessage()); } job.setReducerClass(AdeptReducer.class); job.setInputFormat(KeyValueTextInputFormat.class); job.setOutputFormat(TextOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.set("key.value.separator.in.input.line", "\t"); return job; }
From source file:adept.mapreduce.MapReduceExample.java
License:Apache License
public int run(String[] args) throws Exception { Configuration conf = getConf(); //Configuration conf = new Configuration(); JobConf job = new JobConf(conf, MapReduceExample.class); Path in = new Path(args[0]); Path out = new Path(args[1]); FileInputFormat.setInputPaths(job, in); FileOutputFormat.setOutputPath(job, out); job.setJobName("MapReduecExample"); job.setMapperClass(MapClass.class); job.setReducerClass(Reduce.class); job.setInputFormat(KeyValueTextInputFormat.class); job.setOutputFormat(TextOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.set("key.value.separator.in.input.line", ","); JobClient.runJob(job);/* w w w . j a va 2 s .c om*/ return 0; }
From source file:alluxio.client.hadoop.DFSIOIntegrationTest.java
License:Apache License
private void runIOTest(Class<? extends Mapper<Text, LongWritable, Text, Text>> mapperClass, Path outputDir) throws IOException { JobConf job = new JobConf(mConfig, DFSIOIntegrationTest.class); FileInputFormat.setInputPaths(job, getControlDir(mConfig)); job.setInputFormat(SequenceFileInputFormat.class); job.setMapperClass(mapperClass);/* ww w . j a va 2 s . c o m*/ job.setReducerClass(AccumulatingReducer.class); FileOutputFormat.setOutputPath(job, outputDir); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setNumReduceTasks(1); JobClient.runJob(job); }
From source file:arrestsbyyear.ArrestsByYear.java
public int run(String[] args) throws Exception { Configuration conf = getConf(); JobConf job = new JobConf(conf, ArrestsByYear.class); Path in = new Path(args[0]); Path out = new Path(args[1]); FileInputFormat.setInputPaths(job, in); FileOutputFormat.setOutputPath(job, out); job.setJobName("ArrestsByYear"); job.setMapperClass(MapClass.class); job.setReducerClass(Reduce.class); job.setInputFormat(KeyValueTextInputFormat.class); job.setOutputFormat(TextOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); // job.set("key.value.separator.in.input.line", ""); JobClient.runJob(job);//from w w w . j ava 2 s . co m return 0; }
From source file:average.AverageDriver.java
public static void main(String[] args) { JobClient client = new JobClient(); // Configurations for Job set in this variable JobConf conf = new JobConf(average.AverageDriver.class); // Name of the Job conf.setJobName("BookCrossing1.0"); // Data type of Output Key and Value conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); // Setting the Mapper and Reducer Class conf.setMapperClass(average.AverageMapper.class); conf.setReducerClass(average.AverageReducer.class); // Formats of the Data Type of Input and output conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); // Specify input and output DIRECTORIES (not files) FileInputFormat.setInputPaths(conf, new Path(args[1])); FileOutputFormat.setOutputPath(conf, new Path(args[2])); client.setConf(conf);/*from w w w . j a va 2s .c o m*/ try { // Running the job with Configurations set in the conf. JobClient.runJob(conf); } catch (Exception e) { e.printStackTrace(); } }
From source file:averageprocessingtimesbytype.AverageProcessingTimesByType.java
public int run(String[] args) throws Exception { Configuration conf = getConf(); JobConf job = new JobConf(conf, AverageProcessingTimesByType.class); Path in = new Path(args[0]); Path out = new Path(args[1]); FileInputFormat.setInputPaths(job, in); FileOutputFormat.setOutputPath(job, out); job.setJobName("AverageProcessingTimesByType"); job.setMapperClass(MapClass.class); job.setReducerClass(Reduce.class); job.setInputFormat(KeyValueTextInputFormat.class); job.setOutputFormat(TextOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); // job.set("key.value.separator.in.input.line", ""); JobClient.runJob(job);/* ww w . j a v a2s . c o m*/ return 0; }
From source file:boa.datagen.SeqSort.java
License:Apache License
/** * The main driver for sort program.//from w ww .j a v a 2 s . c om * Invoke this method to submit the map/reduce job. * @throws IOException When there is communication problems with the * job tracker. */ @Override public int run(String[] args) throws Exception { System.out.println(inPath); JobConf jobConf = new JobConf(getConf(), SeqSort.class); jobConf.setJobName("sorter"); jobConf.setMapperClass(IdentityMapper.class); jobConf.setReducerClass(IdentityReducer.class); JobClient client = new JobClient(jobConf); ClusterStatus cluster = client.getClusterStatus(); int num_reduces = (int) (cluster.getMaxReduceTasks() * 0.9); String sort_reduces = jobConf.get("test.sort.reduces_per_host"); if (sort_reduces != null) { num_reduces = cluster.getTaskTrackers() * Integer.parseInt(sort_reduces); } // Set user-supplied (possibly default) job configs jobConf.setNumReduceTasks(num_reduces); jobConf.setInputFormat(SequenceFileInputFormat.class); jobConf.setOutputFormat(SequenceFileOutputFormat.class); jobConf.setOutputKeyClass(Text.class); jobConf.setOutputValueClass(BytesWritable.class); SequenceFileOutputFormat.setCompressOutput(jobConf, true); SequenceFileOutputFormat.setOutputCompressorClass(jobConf, SnappyCodec.class); SequenceFileOutputFormat.setOutputCompressionType(jobConf, CompressionType.BLOCK); // Make sure there are exactly 2 parameters left. FileInputFormat.setInputPaths(jobConf, inPath); FileOutputFormat.setOutputPath(jobConf, new Path(outPath)); System.out.println("Running on " + cluster.getTaskTrackers() + " nodes to sort from " + FileInputFormat.getInputPaths(jobConf)[0] + " into " + FileOutputFormat.getOutputPath(jobConf) + " with " + num_reduces + " reduces."); Date startTime = new Date(); System.out.println("Job started: " + startTime); jobResult = JobClient.runJob(jobConf); Date end_time = new Date(); System.out.println("Job ended: " + end_time); System.out.println("The job took " + (end_time.getTime() - startTime.getTime()) / 1000 + " seconds."); return 0; }
From source file:br.eti.kinoshita.hadoop.WordCount.java
License:Open Source License
public static void main(String[] args) throws Exception { JobConf conf = new JobConf(WordCount.class); conf.setJarByClass(WordCount.class); conf.setJobName("wordcount"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); conf.setMapperClass(Map.class); conf.setCombinerClass(Reduce.class); conf.setReducerClass(Reduce.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); //FileInputFormat.setInputPaths(conf, new Path("hdfs://chuva:9000/test/leiseca.")); FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); JobClient.runJob(conf);/*from w w w . jav a 2 s .co m*/ }
From source file:br.ufrj.nce.recureco.distributedindex.indexer.IndexerMain.java
License:Open Source License
public static void main(String[] args) throws Exception { JobConf conf = new JobConf(IndexerMain.class); conf.setJobName("indexer"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(IndexerMap.class); conf.setCombinerClass(IndexerReduce.class); conf.setReducerClass(IndexerReduce.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); JobClient.runJob(conf);/*www . ja v a 2 s.c om*/ }
From source file:BU.MET.CS755.SpeciesIterDriver2.java
static boolean MRGraphBuilder(String args[], int iterCnt) { Job theJob = null;/* w w w.ja v a2s .c o m*/ conf = new JobConf(SpeciesIterDriver2.class); conf.setJobName("Species Graph Builder"); conf.setNumReduceTasks(5); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(SpeciesGraphBuilderMapper.class); conf.setReducerClass(SpeciesGraphBuilderReducer.class); // Reading in XML. conf.setInputFormat(StreamInputFormat.class); conf.set("stream.recordreader.class", "org.apache.hadoop.streaming.StreamXmlRecordReader"); // Look for the <page> record in the XML. conf.set("stream.recordreader.begin", "<page>"); conf.set("stream.recordreader.end", "</page>"); inputpath = args[0]; outputpath = args[1] + iterCnt; FileInputFormat.setInputPaths(conf, new Path(inputpath)); FileOutputFormat.setOutputPath(conf, new Path(outputpath)); try { theJob = new Job(conf, "SpeciesIter"); theJob.submit(); } catch (Exception e) { e.printStackTrace(); } try { if (theJob != null) { theJob.waitForCompletion(true); } } catch (Exception e) { e.printStackTrace(); } return true; }