List of usage examples for org.apache.hadoop.mapreduce Job setMapOutputValueClass
public void setMapOutputValueClass(Class<?> theClass) throws IllegalStateException
From source file:com.jbw.recommendsystem.cooc.CoocMRD.java
@Override public int run(String[] strings) throws Exception { Configuration conf = getConf(); Path in = new Path(conf.get("input")); Path out = new Path(conf.get("output")); Job surJob = Job.getInstance(conf); surJob.setJarByClass(CoocMRD.class); surJob.setJobName("Coor"); surJob.setMapperClass(CoocMapper.class); surJob.setReducerClass(IntSumReducer.class); surJob.setMapOutputKeyClass(Text.class); surJob.setMapOutputValueClass(IntWritable.class); surJob.setOutputKeyClass(Text.class); surJob.setOutputValueClass(IntWritable.class); surJob.setInputFormatClass(TextInputFormat.class); surJob.setOutputFormatClass(TextOutputFormat.class); TextInputFormat.addInputPath(surJob, in); TextOutputFormat.setOutputPath(surJob, out); return surJob.waitForCompletion(true) ? 0 : 1; }
From source file:com.jbw.recommendsystem.guiyihua.GYHMRD.java
@Override public int run(String[] strings) throws Exception { Configuration conf = getConf(); Path in = new Path(conf.get("in")); Path out = new Path(conf.get("out")); Job job = Job.getInstance(conf); job.setJarByClass(GYHMRD.class); job.setJobName("fdsjh"); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(Text.class); job.setInputFormatClass(TextInputFormat.class); job.setPartitionerClass(XXPartition.class); job.setOutputFormatClass(TextOutputFormat.class); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(Text.class); job.setNumReduceTasks(2);/*w w w . j a v a 2 s. c o m*/ TextInputFormat.addInputPath(job, in); TextOutputFormat.setOutputPath(job, out); return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.jbw.recommendsystem.iteamlist.ItemMRD.java
@Override public int run(String[] strings) throws Exception { Configuration conf = getConf(); Path in = new Path(conf.get("input")); Path out = new Path(conf.get("output")); Job surJob = Job.getInstance(conf); surJob.setJarByClass(ItemMRD.class); surJob.setJobName("item"); surJob.setMapperClass(ItemMapper.class); surJob.setReducerClass(ItemReducer.class); surJob.setMapOutputKeyClass(Text.class); surJob.setMapOutputValueClass(Text.class); surJob.setOutputKeyClass(Text.class); surJob.setOutputValueClass(Text.class); surJob.setInputFormatClass(TextInputFormat.class); surJob.setOutputFormatClass(TextOutputFormat.class); TextInputFormat.addInputPath(surJob, in); TextOutputFormat.setOutputPath(surJob, out); return surJob.waitForCompletion(true) ? 0 : 1; }
From source file:com.jbw.recommendsystem.martrixlist.MartrixListMRD.java
@Override public int run(String[] strings) throws Exception { Configuration conf = getConf(); Path in = new Path(conf.get("input")); Path out = new Path(conf.get("output")); Job surJob = Job.getInstance(conf); surJob.setJarByClass(MartrixListMRD.class); surJob.setJobName("user"); surJob.setMapperClass(MListMapper.class); surJob.setReducerClass(MListReducer.class); surJob.setMapOutputKeyClass(Text.class); surJob.setMapOutputValueClass(Text.class); surJob.setOutputKeyClass(Text.class); surJob.setOutputValueClass(Text.class); surJob.setInputFormatClass(TextInputFormat.class); surJob.setOutputFormatClass(TextOutputFormat.class); TextInputFormat.addInputPath(surJob, in); TextOutputFormat.setOutputPath(surJob, out); return surJob.waitForCompletion(true) ? 0 : 1; }
From source file:com.juniarto.secondsorter.SsJob.java
public int run(String[] allArgs) throws Exception { Configuration conf = getConf(); Job job = new Job(conf, "secondary sort"); job.setJarByClass(SsJob.class); job.setPartitionerClass(NaturalKeyPartitioner.class); job.setGroupingComparatorClass(NaturalKeyGroupingComparator.class); job.setSortComparatorClass(CompositeKeyComparator.class); job.setMapOutputKeyClass(TextDsi.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setMapperClass(SsMapper.class); job.setReducerClass(SsReducer.class); job.setNumReduceTasks(2);//from w ww . j a v a2 s. co m String[] args = new GenericOptionsParser(getConf(), allArgs).getRemainingArgs(); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); //job.submit(); long time1 = System.nanoTime(); boolean status = job.waitForCompletion(true); long time2 = System.nanoTime(); long timeSpent = time2 - time1; LOG.info("TIME: " + timeSpent); return 0; }
From source file:com.justgiving.raven.kissmetrics.jsonenricher.KissmetricsJsonToEnrichedJsonDriver.java
License:Open Source License
public static void main(String[] args) throws Exception { logger.info("Logger - Converting Kissmetrics Json to Valid Json files"); System.out.println("Converting Kissmetrics Json to Valid Json files"); System.out.println("defaultCharacterEncoding by property: " + System.getProperty("file.encoding")); System.out.println("defaultCharacterEncoding by code: " + getDefaultCharEncoding()); System.out.println("defaultCharacterEncoding by charSet: " + Charset.defaultCharset()); Job job = Job.getInstance(); job.setJarByClass(KissmetricsJsonToEnrichedJsonDriver.class); job.setJobName("Kissmetrics Json to valid and enriched Json files"); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); //Add number of reducers int numberOfReducers = 2; if (args.length > 2 && args[2] != null) { numberOfReducers = Integer.parseInt(args[2]); if (numberOfReducers <= 0) { numberOfReducers = 2;/*from w w w .j ava 2s . c o m*/ } } job.setMapperClass(com.justgiving.raven.kissmetrics.jsonenricher.KissmetricsJsonToEnrichedJsonMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setReducerClass( com.justgiving.raven.kissmetrics.jsonenricher.KissmetricsJsonToEnrichedJsonReducer.class); job.setNumReduceTasks(numberOfReducers); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.kasabi.labs.freebase.mr.Freebase2RDFDriver.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (log.isDebugEnabled()) { log.debug("run({})", Utils.toString(args)); }//from w w w. j a v a 2 s . com if (args.length != 2) { System.err.printf("Usage: %s [generic options] <input> <output>\n", getClass().getName()); ToolRunner.printGenericCommandUsage(System.err); return -1; } Configuration configuration = getConf(); boolean useCompression = configuration.getBoolean(Constants.OPTION_USE_COMPRESSION, Constants.OPTION_USE_COMPRESSION_DEFAULT); if (useCompression) { configuration.setBoolean("mapred.compress.map.output", true); configuration.set("mapred.output.compression.type", "BLOCK"); configuration.set("mapred.map.output.compression.codec", "org.apache.hadoop.io.compress.GzipCodec"); } boolean overrideOutput = configuration.getBoolean(Constants.OPTION_OVERRIDE_OUTPUT, Constants.OPTION_OVERRIDE_OUTPUT_DEFAULT); FileSystem fs = FileSystem.get(new Path(args[1]).toUri(), configuration); if (overrideOutput) { fs.delete(new Path(args[1]), true); } Job job = new Job(configuration); job.setJobName("Freebase2RDFDriver"); job.setJarByClass(getClass()); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setInputFormatClass(TextInputFormat.class); job.setMapperClass(Freebase2RDFMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setReducerClass(Freebase2RDFReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); Utils.setReducers(job, configuration, log); job.setOutputFormatClass(TextOutputFormat.class); if (log.isDebugEnabled()) Utils.log(job, log); return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.kk.hadoop.SecondarySort.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: secondarysrot <in> <out>"); System.exit(2);/*from w w w . j a v a 2 s . co m*/ } Job job = new Job(conf, "secondary sort"); job.setJarByClass(SecondarySort.class); job.setMapperClass(MapClass.class); job.setReducerClass(Reduce.class); job.setNumReduceTasks(2); // group and partition by the first int in the pair job.setPartitionerClass(FirstPartitioner.class); // the map output is IntPair, IntWritable job.setMapOutputKeyClass(IntPair.class); job.setMapOutputValueClass(IntWritable.class); // the reduce output is Text, IntWritable job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.kse.bigdata.main.Driver.java
License:Apache License
public static void main(String[] args) throws Exception { /********************************************************************************** ** Merge the source files into one. ** /** Should change the directories of each file before executing the program ** ***********************************************************************************/ // String inputFileDirectory = "/media/bk/??/BigData_Term_Project/Debug"; // String resultFileDirectory = "/media/bk/??/BigData_Term_Project/debug.csv"; // File resultFile = new File(resultFileDirectory); // if(!resultFile.exists()) // new SourceFileMerger(inputFileDirectory, resultFileDirectory).mergeFiles(); /********************************************************************************** * Hadoop Operation.// w w w. j a v a2 s . c o m * Befort Start, Check the Length of Sequence We Want to Predict. **********************************************************************************/ Configuration conf = new Configuration(); //Enable MapReduce intermediate compression as Snappy conf.setBoolean("mapred.compress.map.output", true); conf.set("mapred.map.output.compression.codec", "org.apache.hadoop.io.compress.SnappyCodec"); //Enable Profiling //conf.setBoolean("mapred.task.profile", true); String testPath = null; String inputPath = null; String outputPath = null; int sampleSize = 1; ArrayList<String> results = new ArrayList<String>(); for (int index = 0; index < args.length; index++) { /* * Mandatory command */ //Extract input path string from command line. if (args[index].equals("-in")) inputPath = args[index + 1]; //Extract output path string from command line. if (args[index].equals("-out")) outputPath = args[index + 1]; //Extract test data path string from command line. if (args[index].equals("-test")) testPath = args[index + 1]; /* * Optional command */ //Extract a number of neighbors. if (args[index].equals("-nn")) conf.setInt(Reduce.NUMBER_OF_NEAREAST_NEIGHBOR, Integer.parseInt(args[index + 1])); //Whether job uses normalization or not. if (args[index].equals("-norm")) conf.setBoolean(Map.NORMALIZATION, true); //Extract the number of sample size to test. if (args[index].equals("-s")) sampleSize = Integer.valueOf(args[index + 1]); //Whether job uses mean or median //[Default : mean] if (args[index].equals("-med")) conf.setBoolean(Reduce.MEDIAN, true); } String outputFileName = "part-r-00000"; SequenceSampler sampler = new SequenceSampler(testPath, sampleSize); LinkedList<Sequence> testSequences = sampler.getRandomSample(); // Test Sequence // String testSeqString = "13.591-13.674-13.778-13.892-13.958-14.049-14.153-14.185-14.169-14.092-13.905-13.702-13.438-13.187-13.0-12.914-12.868-12.766-12.62-12.433-12.279-12.142-12.063-12.025-100"; // Sequence testSeq = new Sequence(testSeqString); // LinkedList<Sequence> testSequences = new LinkedList<>(); // testSequences.add(testSeq); for (Sequence seq : testSequences) { /* ******************** Hadoop Launch *********************** */ System.out.println(seq.getTailString()); conf.set(Map.INPUT_SEQUENCE, seq.toString()); Job job = new Job(conf); job.setJarByClass(Driver.class); job.setJobName("term-project-driver"); job.setMapperClass(Map.class); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(Text.class); // Should think another way to implement the combiner class // Current Implementation is not helpful to Job. // job.setCombinerClass(Combiner.class); //Set 1 for number of reduce task for keeping 100 most neighbors in sorted set. job.setNumReduceTasks(1); job.setReducerClass(Reduce.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.setInputPaths(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.waitForCompletion(true); /* * if job finishes, get result of the job and store it in results(list). */ try { FileSystem hdfs = FileSystem.get(new Configuration()); BufferedReader fileReader = new BufferedReader( new InputStreamReader(hdfs.open(new Path(outputPath + "/" + outputFileName)))); String line; while ((line = fileReader.readLine()) != null) { results.add(seq.getSeqString() + " " + line); } fileReader.close(); hdfs.delete(new Path(outputPath), true); hdfs.close(); } catch (IOException e) { e.printStackTrace(); System.exit(1); } } /* * if all jobs finish, store results of jobs to output/result.txt file. */ String finalOutputPath = "output/result.csv"; try { FileSystem hdfs = FileSystem.get(new Configuration()); Path file = new Path(finalOutputPath); if (hdfs.exists(file)) { hdfs.delete(file, true); } OutputStream os = hdfs.create(file); PrintWriter printWriter = new PrintWriter(new OutputStreamWriter(os, "UTF-8")); //CSV File Header printWriter.println("Actual,Predicted,MER,MAE"); printWriter.flush(); for (String result : results) { String[] tokens = result.split("\\s+"); printWriter.println(tokens[0] + "," + tokens[1] + "," + tokens[2] + "," + tokens[3]); printWriter.flush(); } printWriter.close(); hdfs.close(); } catch (IOException e) { e.printStackTrace(); System.exit(1); } }
From source file:com.lightboxtechnologies.spectrum.PythonJob.java
License:Apache License
public static int run(String imageID, String friendlyName, String outpath, String pymap, String pyred, String format, Configuration conf) throws Exception { if (conf == null) { conf = HBaseConfiguration.create(); }/*from w ww .java2 s . c o m*/ final Job job = SKJobFactory.createJobFromConf(imageID, friendlyName, "PythonJob", conf); job.setJarByClass(PythonJob.class); job.setMapperClass(PythonMapper.class); PyEngine py = new PyEngine(); configPyTask(job, py, "map", pymap); job.setMapOutputKeyClass(py.getKeyClass()); job.setMapOutputValueClass(py.getValueClass()); int numReduces = 1; job.setOutputKeyClass(py.getKeyClass()); job.setOutputValueClass(py.getValueClass()); if (pyred.equals("none")) { numReduces = 0; } else if (pyred.equals("identity")) { job.setReducerClass(Reducer.class); job.setOutputKeyClass(py.getKeyClass()); job.setOutputValueClass(py.getValueClass()); } else if (pyred.equals("LongSumReducer")) { job.setReducerClass(LongSumReducer.class); job.setCombinerClass(LongSumReducer.class); } else { job.setReducerClass(PythonReducer.class); configPyTask(job, py, "reduce", pyred); job.setOutputKeyClass(py.getKeyClass()); job.setOutputValueClass(py.getValueClass()); } job.setNumReduceTasks(numReduces); // it is possible to run over a flat json file... // String input = otherArgs[0]; // if (input.endsWith(".json") == true) { // job.setInputFormatClass(FsEntryJsonInputFormat.class); // FsEntryJsonInputFormat.addInputPath(job, new Path(input)); // } // else { FsEntryHBaseInputFormat.setupJob(job, imageID); job.setInputFormatClass(FsEntryHBaseInputFormat.class); if (format != null && format.equals("SequenceFileOutputFormat")) { job.setOutputFormatClass(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK); } else { job.setOutputFormatClass(TextOutputFormat.class); } FileOutputFormat.setOutputPath(job, new Path(outpath)); return job.waitForCompletion(true) ? 0 : 1; }