List of usage examples for org.apache.hadoop.mapred FileInputFormat setInputPaths
public static void setInputPaths(JobConf conf, Path... inputPaths)
From source file:com.ostor.dedup.hadoop.DedupStorHadoopCreateObjectsMapReduce.java
License:Open Source License
public static void main(String[] args) throws Exception { System.out.println("NOTE: Setting up logs from conf file - " + DedupStor.DEFAULT_LOG4J_FILE); PropertyConfigurator.configure(DedupStor.DEFAULT_LOG4J_FILE); JobConf conf = new JobConf(DedupStorHadoopCreateObjectsMapReduce.class); conf.setJobName("dedup-create-objects"); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(DedupObjectSegmentWritable.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(DedupStorHadoopCreateObjectsMapper.class); conf.setReducerClass(DedupStorHadoopCreateObjectsReducer.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); Path inputPath = new Path(args[0], DedupStorHadoopUtils.DEFAULT_DEDUP_STOR_HADOOP_OBJECTS_TMP_PATH); Path segmentStorPath = new Path(args[0], DedupStorHadoopUtils.DEFAULT_DEDUP_STOR_HADOOP_SEGMENTS_LOC_SUFFIX); Path objectStorPath = new Path(args[0], DedupStorHadoopUtils.DEFAULT_DEDUP_STOR_HADOOP_OBJECTS_LOC_SUFFIX); Path objectMapPath = new Path(args[0], DedupStorHadoopUtils.DEFAULT_DEDUP_STOR_HADOOP_OBJECTS_TMP_PATH); conf.set(DedupStorHadoopUtils.HADOOP_CONF_SEGMENTS_STOR_PATH_KEY, segmentStorPath.toString()); conf.set(DedupStorHadoopUtils.HADOOP_CONF_OBJECTS_STOR_PATH_KEY, objectStorPath.toString()); conf.set(DedupStorHadoopUtils.HADOOP_CONF_OBJECTS_TMP_PATH_KEY, objectMapPath.toString()); FileInputFormat.setInputPaths(conf, inputPath); FileOutputFormat.setOutputPath(conf, objectStorPath); JobClient.runJob(conf);//from w w w. j ava 2 s . c o m }
From source file:com.ostor.dedup.hadoop.DedupStorHadoopCreateSegmentsMapReduce.java
License:Open Source License
public static void main(String[] args) throws Exception { System.out.println("NOTE: Setting up logs from conf file - " + DedupStor.DEFAULT_LOG4J_FILE); PropertyConfigurator.configure(DedupStor.DEFAULT_LOG4J_FILE); JobConf conf = new JobConf(DedupStorHadoopCreateSegmentsMapReduce.class); conf.setJobName("dedup-create-segments"); conf.setMapOutputKeyClass(DedupHashWritable.class); conf.setMapOutputValueClass(DedupObjectSegmentCompleteWritable.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(DedupObjectSegmentWritable.class); conf.setMapperClass(DedupStorHadoopCreateSegmentsMapper.class); conf.setReducerClass(DedupStorHadoopCreateSegmentsReducer.class); conf.setInputFormat(DedupObjectInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); logger.info("Set input dir - " + args[0]); logger.info("Set output dir - " + args[1]); Path inputPath = new Path(args[0]); Path segmentStorPath = new Path(args[1], DedupStorHadoopUtils.DEFAULT_DEDUP_STOR_HADOOP_SEGMENTS_LOC_SUFFIX); Path objectMapPath = new Path(args[1], DedupStorHadoopUtils.DEFAULT_DEDUP_STOR_HADOOP_OBJECTS_TMP_PATH); conf.set(DedupStorHadoopUtils.HADOOP_CONF_SEGMENTS_STOR_PATH_KEY, segmentStorPath.toString()); conf.set(DedupStorHadoopUtils.HADOOP_CONF_OBJECTS_TMP_PATH_KEY, objectMapPath.toString()); FileInputFormat.setInputPaths(conf, inputPath); FileOutputFormat.setOutputPath(conf, objectMapPath); JobClient.runJob(conf);//from w w w . ja v a 2 s . c o m }
From source file:com.pegasus.ResultInfo.java
License:Apache License
protected JobConf configStage2() throws Exception { final JobConf conf = new JobConf(getConf(), ConCmpt.class); conf.set("cur_iter", "" + cur_iter); conf.set("make_symmetric", "" + make_symmetric); conf.setJobName("ConCmpt_Stage2"); conf.setMapperClass(MapStage2.class); conf.setReducerClass(RedStage2.class); conf.setCombinerClass(CombinerStage2.class); FileInputFormat.setInputPaths(conf, tempbm_path); FileOutputFormat.setOutputPath(conf, nextbm_path); conf.setNumReduceTasks(nreducers);//from w w w.ja va 2s . co m conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); return conf; }
From source file:com.pegasus.ResultInfo.java
License:Apache License
protected JobConf configStage3() throws Exception { final JobConf conf = new JobConf(getConf(), ConCmpt.class); conf.setJobName("ConCmpt_Stage3"); conf.setMapperClass(MapStage3.class); conf.setReducerClass(RedStage3.class); conf.setCombinerClass(RedStage3.class); FileInputFormat.setInputPaths(conf, nextbm_path); FileOutputFormat.setOutputPath(conf, output_path); conf.setNumReduceTasks(1); // This is necessary. conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); return conf;//w ww. j ava 2 s.com }
From source file:com.qfa.WordCount.java
License:Apache License
/** * The main driver for word count map/reduce program. * Invoke this method to submit the map/reduce job. * @throws IOException When there is communication problems with the * job tracker.// w w w. j a v a 2 s . c om */ public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf(), WordCount.class); conf.setJobName("wordcount"); // the keys are words (strings) conf.setOutputKeyClass(Text.class); // the values are counts (ints) conf.setOutputValueClass(IntWritable.class); conf.setMapperClass(MapClass.class); conf.setCombinerClass(Reduce.class); conf.setReducerClass(Reduce.class); List<String> other_args = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { try { if ("-m".equals(args[i])) { conf.setNumMapTasks(Integer.parseInt(args[++i])); } else if ("-r".equals(args[i])) { conf.setNumReduceTasks(Integer.parseInt(args[++i])); } else { other_args.add(args[i]); } } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); } } // Make sure there are exactly 2 parameters left. if (other_args.size() != 2) { System.out.println("ERROR: Wrong number of parameters: " + other_args.size() + " instead of 2."); return printUsage(); } FileInputFormat.setInputPaths(conf, other_args.get(0)); FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1))); JobClient.runJob(conf); return 0; }
From source file:com.rapleaf.hank.hadoop.HadoopDomainBuilder.java
License:Apache License
public static final JobConf createJobConfiguration(String inputPath, Class<? extends InputFormat> inputFormatClass, Class<? extends Mapper> mapperClass, int versionNumber, DomainBuilderProperties properties) { JobConf conf = new JobConf(); // Input specification conf.setInputFormat(inputFormatClass); FileInputFormat.setInputPaths(conf, inputPath); // Mapper class and key/value classes conf.setMapperClass(mapperClass);//w ww . j a v a2 s . c o m conf.setMapOutputKeyClass(KeyAndPartitionWritableComparable.class); conf.setMapOutputValueClass(ValueWritable.class); // Reducer class and key/value classes conf.setReducerClass(DomainBuilderReducer.class); conf.setOutputKeyClass(KeyAndPartitionWritable.class); conf.setOutputValueClass(ValueWritable.class); // Output format conf.setOutputFormat(properties.getOutputFormatClass()); // Output path (set to tmp output path) FileOutputFormat.setOutputPath(conf, new Path(properties.getTmpOutputPath(versionNumber))); // Partitioner conf.setPartitionerClass(DomainBuilderPartitioner.class); // Output Committer conf.setOutputCommitter(DomainBuilderOutputCommitter.class); // Hank specific configuration properties.setJobConfProperties(conf, versionNumber); return conf; }
From source file:com.scaleoutsoftware.soss.hserver.Test_WordCountMapred.java
License:Apache License
/** * The main driver for word count map/reduce program. * Invoke this method to submit the map/reduce job. * @throws IOException When there is communication problems with the * job tracker./*ww w .jav a 2 s .c o m*/ */ public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf(), Test_WordCountMapred.class); conf.setJobName("wordcount"); // the keys are words (strings) conf.setOutputKeyClass(Text.class); // the values are counts (ints) conf.setOutputValueClass(IntWritable.class); conf.setMapperClass(MapClass.class); conf.setCombinerClass(Reduce.class); conf.setReducerClass(Reduce.class); conf.setNumReduceTasks(0); String in = args.length == 2 ? args[0] : "random.txt"; String out = args.length == 2 ? args[1] : "c:\\development\\mapred_output\\dir" + System.currentTimeMillis(); FileInputFormat.setInputPaths(conf, new Path(in)); FileOutputFormat.setOutputPath(conf, new Path(out)); InvocationGrid grid = HServerJob.getInvocationGridBuilder("MyGrid" + System.currentTimeMillis()) .addJar("/path/to/your/jar").load(); // HERE IS STANDARD HADOOP INVOCATION //JobClient.runJob(conf); // HSERVER INVOCATION HServerJobClient.runJob(conf, false, grid); return 0; }
From source file:com.test.hadoop.JhhSort.java
License:Apache License
/** * The main driver for sort program. Invoke this method to submit the * map/reduce job.//from w w w . jav a2s . c om * * @throws IOException * When there is communication problems with the job tracker. */ @SuppressWarnings({ "rawtypes" }) public int run(String[] args) throws Exception { JobConf jobConf = new JobConf(getConf(), JhhSort.class); jobConf.setJobName("sorter"); jobConf.set("mapred.job.tracker", "192.168.12.200:9001"); jobConf.set("fs.default.name", "hdfs://192.168.12.200:9000"); jobConf.setMapperClass(IdentityMapper.class); jobConf.setReducerClass(IdentityReducer.class); JobClient client = new JobClient(jobConf); ClusterStatus cluster = client.getClusterStatus(); int num_reduces = (int) (cluster.getMaxReduceTasks() * 0.5); String sort_reduces = jobConf.get("test.sort.reduces_per_host"); if (sort_reduces != null) { num_reduces = cluster.getTaskTrackers() * Integer.parseInt(sort_reduces); } Class<? extends InputFormat> inputFormatClass = TextInputFormat.class; Class<? extends OutputFormat> outputFormatClass = TextOutputFormat.class; Class<? extends WritableComparable> outputKeyClass = LongWritable.class; Class<? extends Writable> outputValueClass = LongWritable.class; List<String> otherArgs = new ArrayList<String>(); InputSampler.Sampler<K, V> sampler = null; for (int i = 0; i < args.length; ++i) { try { if ("-m".equals(args[i])) { jobConf.setNumMapTasks(Integer.parseInt(args[++i])); } else if ("-r".equals(args[i])) { num_reduces = Integer.parseInt(args[++i]); } else if ("-inFormat".equals(args[i])) { inputFormatClass = Class.forName(args[++i]).asSubclass(InputFormat.class); } else if ("-outFormat".equals(args[i])) { outputFormatClass = Class.forName(args[++i]).asSubclass(OutputFormat.class); } else if ("-outKey".equals(args[i])) { outputKeyClass = Class.forName(args[++i]).asSubclass(WritableComparable.class); } else if ("-outValue".equals(args[i])) { outputValueClass = Class.forName(args[++i]).asSubclass(Writable.class); } else if ("-totalOrder".equals(args[i])) { double pcnt = Double.parseDouble(args[++i]); int numSamples = Integer.parseInt(args[++i]); int maxSplits = Integer.parseInt(args[++i]); if (0 >= maxSplits) maxSplits = Integer.MAX_VALUE; sampler = new InputSampler.RandomSampler<K, V>(pcnt, numSamples, maxSplits); } else { otherArgs.add(args[i]); } } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); // exits } } // Set user-supplied (possibly default) job configs jobConf.setNumReduceTasks(num_reduces); jobConf.setInputFormat(inputFormatClass); jobConf.setOutputFormat(outputFormatClass); jobConf.setOutputKeyClass(outputKeyClass); jobConf.setOutputValueClass(outputValueClass); // Make sure there are exactly 2 parameters left. if (otherArgs.size() != 2) { System.out.println("ERROR: Wrong number of parameters: " + otherArgs.size() + " instead of 2."); return printUsage(); } FileInputFormat.setInputPaths(jobConf, otherArgs.get(0)); FileOutputFormat.setOutputPath(jobConf, new Path(otherArgs.get(1))); if (sampler != null) { System.out.println("Sampling input to effect total-order sort..."); jobConf.setPartitionerClass(TotalOrderPartitioner.class); Path inputDir = FileInputFormat.getInputPaths(jobConf)[0]; inputDir = inputDir.makeQualified(inputDir.getFileSystem(jobConf)); Path partitionFile = new Path(inputDir, "_sortPartitioning"); TotalOrderPartitioner.setPartitionFile(jobConf, partitionFile); InputSampler.<K, V>writePartitionFile(jobConf, sampler); URI partitionUri = new URI(partitionFile.toString() + "#" + "_sortPartitioning"); DistributedCache.addCacheFile(partitionUri, jobConf); DistributedCache.createSymlink(jobConf); } System.out.println("Running on " + cluster.getTaskTrackers() + " nodes to sort from " + FileInputFormat.getInputPaths(jobConf)[0] + " into " + FileOutputFormat.getOutputPath(jobConf) + " with " + num_reduces + " reduces."); Date startTime = new Date(); System.out.println("Job started: " + startTime); jobResult = JobClient.runJob(jobConf); Date end_time = new Date(); System.out.println("Job ended: " + end_time); System.out.println("The job took " + (end_time.getTime() - startTime.getTime()) / 1000 + " seconds."); return 0; }
From source file:com.test.hadoop.JhhSum.java
License:Apache License
@SuppressWarnings({ "rawtypes" }) public int run(String[] args) throws Exception { JobConf jobConf = new JobConf(getConf(), JhhSum.class); jobConf.setJobName("sum"); jobConf.set("mapred.job.tracker", "192.168.12.200:9001"); jobConf.set("fs.default.name", "hdfs://192.168.12.200:9000"); jobConf.setMapperClass(IdentityMapper.class); jobConf.setReducerClass(LongSumReducer.class); JobClient client = new JobClient(jobConf); ClusterStatus cluster = client.getClusterStatus(); int num_reduces = (int) (cluster.getMaxReduceTasks() * 0.5); String sort_reduces = jobConf.get("test.sort.reduces_per_host"); if (sort_reduces != null) { num_reduces = cluster.getTaskTrackers() * Integer.parseInt(sort_reduces); }//from w ww . j a va 2 s .c om Class<? extends InputFormat> inputFormatClass = JhhInputFormat.class; Class<? extends OutputFormat> outputFormatClass = TextOutputFormat.class; Class<? extends WritableComparable> outputKeyClass = Text.class; Class<? extends Writable> outputValueClass = LongWritable.class; List<String> otherArgs = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { otherArgs.add(args[i]); } // Set user-supplied (possibly default) job configs jobConf.setNumReduceTasks(num_reduces); jobConf.setInputFormat(inputFormatClass); jobConf.setOutputFormat(outputFormatClass); jobConf.setOutputKeyClass(outputKeyClass); jobConf.setOutputValueClass(outputValueClass); if (otherArgs.size() != 2) { System.out.println("ERROR: Wrong number of parameters: " + otherArgs.size() + " instead of 2."); } FileInputFormat.setInputPaths(jobConf, otherArgs.get(0)); FileOutputFormat.setOutputPath(jobConf, new Path(otherArgs.get(1))); System.out.println("Running on " + cluster.getTaskTrackers() + " nodes to sort from " + FileInputFormat.getInputPaths(jobConf)[0] + " into " + FileOutputFormat.getOutputPath(jobConf) + " with " + num_reduces + " reduces."); Date startTime = new Date(); System.out.println("Job started: " + startTime); jobResult = JobClient.runJob(jobConf); Date end_time = new Date(); System.out.println("Job ended: " + end_time); System.out.println("The job took " + (end_time.getTime() - startTime.getTime()) / 1000 + " seconds."); return 0; }
From source file:com.test.PiEstimatorKrb.java
License:Apache License
/** * Run a map/reduce job for estimating Pi. * * @return the estimated value of Pi/*w w w . j a v a 2 s . c o m*/ */ public static BigDecimal estimate(int numMaps, long numPoints, JobConf jobConf) throws IOException { //setup job conf jobConf.setJobName(PiEstimatorKrb.class.getSimpleName()); jobConf.setInputFormat(SequenceFileInputFormat.class); jobConf.setOutputKeyClass(BooleanWritable.class); jobConf.setOutputValueClass(LongWritable.class); jobConf.setOutputFormat(SequenceFileOutputFormat.class); jobConf.setMapperClass(PiMapper.class); jobConf.setNumMapTasks(numMaps); jobConf.setReducerClass(PiReducer.class); jobConf.setNumReduceTasks(1); // turn off speculative execution, because DFS doesn't handle // multiple writers to the same file. jobConf.setSpeculativeExecution(false); //setup input/output directories final Path inDir = new Path(TMP_DIR, "in"); final Path outDir = new Path(TMP_DIR, "out"); FileInputFormat.setInputPaths(jobConf, inDir); FileOutputFormat.setOutputPath(jobConf, outDir); final FileSystem fs = FileSystem.get(jobConf); if (fs.exists(TMP_DIR)) { throw new IOException( "Tmp directory " + fs.makeQualified(TMP_DIR) + " already exists. Please remove it first."); } if (!fs.mkdirs(inDir)) { throw new IOException("Cannot create input directory " + inDir); } try { //generate an input file for each map task for (int i = 0; i < numMaps; ++i) { final Path file = new Path(inDir, "part" + i); final LongWritable offset = new LongWritable(i * numPoints); final LongWritable size = new LongWritable(numPoints); final SequenceFile.Writer writer = SequenceFile.createWriter(fs, jobConf, file, LongWritable.class, LongWritable.class, CompressionType.NONE); try { writer.append(offset, size); } finally { writer.close(); } sLogger.info("Wrote input for Map #" + i); } //start a map/reduce job sLogger.info("Starting Job"); final long startTime = System.currentTimeMillis(); if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) { jobConf.set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION")); } JobClient.runJob(jobConf); final double duration = (System.currentTimeMillis() - startTime) / 1000.0; sLogger.info("Job Finished in " + duration + " seconds"); //read outputs Path inFile = new Path(outDir, "reduce-out"); LongWritable numInside = new LongWritable(); LongWritable numOutside = new LongWritable(); SequenceFile.Reader reader = new SequenceFile.Reader(fs, inFile, jobConf); try { reader.next(numInside, numOutside); } finally { reader.close(); } //compute estimated value return BigDecimal.valueOf(4).setScale(20).multiply(BigDecimal.valueOf(numInside.get())) .divide(BigDecimal.valueOf(numMaps)).divide(BigDecimal.valueOf(numPoints)); } finally { fs.delete(TMP_DIR, true); } }