List of usage examples for org.apache.hadoop.mapreduce Job getInstance
@Deprecated public static Job getInstance(Cluster ignored) throws IOException
From source file:de.tudarmstadt.ukp.dkpro.c4corpus.hadoop.statistics.WARCRecordCounter.java
License:Apache License
@Override public int run(String[] args) throws Exception { Configuration conf = getConf(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); System.out.println("Other args: " + Arrays.toString(otherArgs)); Job job = Job.getInstance(conf); job.setJarByClass(WARCRecordCounter.class); job.setJobName(WARCRecordCounter.class.getName()); // mapper//from w ww .ja v a2 s . c o m job.setMapperClass(ResponseMapper.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); // combiner + reducer job.setCombinerClass(MyReducer.class); job.setReducerClass(MyReducer.class); job.setInputFormatClass(WARCInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); // paths String commaSeparatedInputFiles = otherArgs[0]; String outputPath = otherArgs[1]; FileInputFormat.addInputPaths(job, commaSeparatedInputFiles); FileOutputFormat.setOutputPath(job, new Path(outputPath)); return job.waitForCompletion(true) ? 0 : 1; }
From source file:de.tudarmstadt.ukp.dkpro.c4corpus.hadoop.utils.PagesByURLExtractor.java
License:Apache License
@Override public int run(String[] args) throws Exception { Job job = Job.getInstance(getConf()); for (Map.Entry<String, String> next : job.getConfiguration()) { System.out.println(next.getKey() + ": " + next.getValue()); }/* w w w . j a va2s. co m*/ job.setJarByClass(PagesByURLExtractor.class); job.setJobName(PagesByURLExtractor.class.getName()); // mapper job.setMapperClass(MapperClass.class); // input job.setInputFormatClass(WARCInputFormat.class); // output job.setOutputFormatClass(WARCOutputFormat.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(WARCWritable.class); FileOutputFormat.setCompressOutput(job, true); // paths String commaSeparatedInputFiles = args[0]; String outputPath = args[1]; // load IDs to be searched for job.getConfiguration().set(MAPREDUCE_MAPPER_URLS, loadURLs(args[2])); FileInputFormat.addInputPaths(job, commaSeparatedInputFiles); FileOutputFormat.setOutputPath(job, new Path(outputPath)); return job.waitForCompletion(true) ? 0 : 1; }
From source file:de.tudarmstadt.ukp.dkpro.c4corpus.hadoop.utils.TextToSentencesSplitter.java
License:Apache License
@Override public int run(String[] args) throws Exception { Configuration conf = getConf(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); Job job = Job.getInstance(conf); job.setJarByClass(TextToSentencesSplitter.class); job.setJobName(TextToSentencesSplitter.class.getName()); // mapper//from www . j a va 2s .c om job.setMapperClass(TextToSentencesSplitter.MapperClass.class); job.setInputFormatClass(WARCInputFormat.class); // reducer job.setReducerClass(ReducerClass.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setOutputFormatClass(TextOutputFormat.class); // paths String commaSeparatedInputFiles = otherArgs[0]; String outputPath = otherArgs[1]; FileInputFormat.addInputPaths(job, commaSeparatedInputFiles); FileOutputFormat.setOutputPath(job, new Path(outputPath)); return job.waitForCompletion(true) ? 0 : 1; }
From source file:de.tudarmstadt.ukp.dkpro.c4corpus.hadoop.utils.URIExtractor.java
License:Apache License
/** * {@inheritDoc}/*from w ww . java2 s . com*/ */ @Override public int run(String[] args) throws Exception { Job job = Job.getInstance(getConf()); // set from the command line job.setJarByClass(URIExtractor.class); job.setJobName(URIExtractor.class.getName()); // mapper job.setMapperClass(URIExtractorMapper.class); job.setReducerClass(URIExtractorReducer.class); // input-output is warc job.setInputFormatClass(WARCInputFormat.class); // is necessary, so that Hadoop does not mix the map input format up. job.setOutputKeyClass(Text.class); job.setOutputValueClass(NullWritable.class); // set output compression to GZip FileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class); FileInputFormat.addInputPaths(job, args[0]); FileOutputFormat.setOutputPath(job, new Path(args[1])); return job.waitForCompletion(true) ? 0 : 1; }
From source file:de.tudarmstadt.ukp.experiments.dip.hadoop.ClueWebTRECIdFileExtractor.java
License:Apache License
@Override public int run(String[] args) throws Exception { Job job = Job.getInstance(getConf()); for (Map.Entry<String, String> next : job.getConfiguration()) { System.out.println(next.getKey() + ": " + next.getValue()); }//from w ww .jav a2 s .com job.setJarByClass(ClueWebTRECIdFileExtractor.class); job.setJobName(ClueWebTRECIdFileExtractor.class.getName()); // mapper job.setMapperClass(MapperClass.class); // input job.setInputFormatClass(WARCInputFormat.class); // output job.setOutputFormatClass(WARCOutputFormat.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(WARCWritable.class); FileOutputFormat.setCompressOutput(job, true); // paths String commaSeparatedInputFiles = args[0]; String outputPath = args[1]; // load IDs to be searched for job.getConfiguration().set(MAPREDUCE_MAPPER_TREC_IDS, loadTrecIds(args[2])); FileInputFormat.addInputPaths(job, commaSeparatedInputFiles); FileOutputFormat.setOutputPath(job, new Path(outputPath)); return job.waitForCompletion(true) ? 0 : 1; }
From source file:dm_p2_mr.DM_P2_MR.java
public static void main(String[] args) throws Exception { // TODO code application logic here generateLinkedHashMap("cho.txt"); while (iter < 10) { if (iter == 0) { Configuration confg = new Configuration(); for (int i = 0; i < init_centroids.length; i++) { List<Double> exps = linkedHashMap.get(Integer.parseInt(init_centroids[i])); StringBuilder temp = new StringBuilder(); for (int k = 0; k < exps.size(); k++) { temp.append(exps.get(i)); temp.append(" "); }/*from ww w . j a v a2s .com*/ confg.set(String.valueOf(i + 1), temp.toString()); } Job job = Job.getInstance(confg); job.setJobName("mapreduce"); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setMapperClass(kmapper.class); //conf.setCombinerClass(Reduce.class); job.setReducerClass(kreducer.class); // FileInputFormat.addInputPath(job, new Path(inputPath)); // FileOutputFormat.setOutputPath(job, new Path(outputPath)); // // job.setInputFormat(TextInputFormat.class); // conf.setOutputFormat(TextOutputFormat.class); String filePath = new File("").getAbsolutePath(); String inputPath = "/input"; String outputPath = "/output"; FileInputFormat.setInputPaths(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); // for (int i = 0; i < init_centroids.length; i++) { // List<Double> exps = linkedHashMap.get(Integer.parseInt(init_centroids[i])); // StringBuilder temp = new StringBuilder(); // for (int k = 0; k < exps.size(); k++) { // temp.append(exps.get(i)); // temp.append(" "); // } // conf.set(init_centroids[i], temp.toString()); // // } job.waitForCompletion(true); //JobClient.runJob(job); } else { Configuration confg = new Configuration(); FileSystem fOpen = FileSystem.get(confg); Path outputPathReduceFile = new Path("/output/part-r-00000"); BufferedReader reader = new BufferedReader(new InputStreamReader(fOpen.open(outputPathReduceFile))); String Line = reader.readLine(); while (Line != null) { String[] split = Line.split(":"); confg.set(split[0], split[1]); Line = reader.readLine(); } // for (int i = 0; i < init_centroids.length; i++) { // List<Double> exps = linkedHashMap.get(Integer.parseInt(init_centroids[i])); // StringBuilder temp = new StringBuilder(); // for (int k = 0; k < exps.size(); k++) { // temp.append(exps.get(i)); // temp.append(" "); // } // confg.set(String.valueOf(i + 1), temp.toString()); // // } Job job = Job.getInstance(confg); job.setJobName("mapreduce"); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setMapperClass(kmapper.class); //conf.setCombinerClass(Reduce.class); job.setReducerClass(kreducer.class); // FileInputFormat.addInputPath(job, new Path(inputPath)); // FileOutputFormat.setOutputPath(job, new Path(outputPath)); // // job.setInputFormat(TextInputFormat.class); // conf.setOutputFormat(TextOutputFormat.class); String filePath = new File("").getAbsolutePath(); String inputPath = "/input"; String outputPath = "/output"; FileInputFormat.setInputPaths(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); // for (int i = 0; i < init_centroids.length; i++) { // List<Double> exps = linkedHashMap.get(Integer.parseInt(init_centroids[i])); // StringBuilder temp = new StringBuilder(); // for (int k = 0; k < exps.size(); k++) { // temp.append(exps.get(i)); // temp.append(" "); // } // conf.set(init_centroids[i], temp.toString()); // // } job.waitForCompletion(true); //JobClient.runJob(job); } iter++; } }
From source file:edu.gslis.ts.hadoop.CollectionStats.java
License:Apache License
public int run(String[] args) throws Exception { String tableName = args[0];/*from w ww .ja v a2s . c o m*/ String inputPath = args[1]; Configuration config = HBaseConfiguration.create(getConf()); Job job = Job.getInstance(config); job.setJarByClass(CollectionStats.class); job.setInputFormatClass(ThriftFileInputFormat.class); Scan scan = new Scan(); scan.setCaching(500); scan.setCacheBlocks(false); TableMapReduceUtil.initTableReducerJob(tableName, ThriftFilterReducer.class, job); job.setOutputKeyClass(Text.class); job.setOutputValueClass(MapWritable.class); job.setMapperClass(ThriftFilterMapper.class); FileInputFormat.addInputPath(job, new Path(inputPath)); boolean b = job.waitForCompletion(true); if (!b) { throw new IOException("error with job!"); } return 0; }
From source file:edu.gslis.ts.hadoop.HbaseRowCount.java
License:Apache License
public int run(String[] args) throws Exception { String tableName = args[0];/* ww w . j ava 2 s . co m*/ Path outputPath = new Path(args[1]); Configuration config = HBaseConfiguration.create(getConf()); Job job = Job.getInstance(config); job.setJarByClass(HbaseRowCount.class); Scan scan = new Scan(); scan.setCaching(500); scan.setCacheBlocks(false); TableMapReduceUtil.initTableMapperJob(tableName, scan, HbaseCountTableMapper.class, Text.class, // mapper output key IntWritable.class, // mapper output value job); job.setReducerClass(HbaseCountReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setOutputFormatClass(TextOutputFormat.class); FileOutputFormat.setOutputPath(job, outputPath); boolean b = job.waitForCompletion(true); if (!b) { throw new IOException("error with job!"); } return 0; }
From source file:edu.gslis.ts.hadoop.HbaseWordCount.java
License:Apache License
public int run(String[] args) throws Exception { String tableName = args[0];/*from ww w . j ava2s .com*/ Path outputPath = new Path(args[1]); Configuration config = HBaseConfiguration.create(getConf()); Job job = Job.getInstance(config); job.setJarByClass(HbaseWordCount.class); Scan scan = new Scan(); scan.setCaching(500); scan.setCacheBlocks(false); TableMapReduceUtil.initTableMapperJob(tableName, scan, HbaseWcTableMapper.class, Text.class, // mapper output key IntWritable.class, // mapper output value job); job.setReducerClass(HbaseWcReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setOutputFormatClass(TextOutputFormat.class); FileOutputFormat.setOutputPath(job, outputPath); boolean b = job.waitForCompletion(true); if (!b) { throw new IOException("error with job!"); } return 0; }
From source file:edu.gslis.ts.hadoop.ThriftBulkLoader.java
License:Apache License
public int run(String[] args) throws Exception { String tableName = args[0];//from ww w .j a va 2 s . c o m String inputPath = args[1]; String outputPath = args[2]; Path topicsFile = new Path(args[3]); Path vocabFile = new Path(args[4]); Path dateBinFile = new Path(args[5]); Configuration config = getConf(); config.set("hbase.table.name", tableName); HBaseConfiguration.addHbaseResources(config); Job job = Job.getInstance(config); job.setJarByClass(ThriftBulkLoader.class); job.setJobName("Bulk Loading HBase Table::" + tableName); job.setInputFormatClass(ThriftFileInputFormat.class); job.setMapOutputKeyClass(ImmutableBytesWritable.class); job.setMapperClass(ThriftFilterMapper.class); Path output = new Path(outputPath); FileInputFormat.addInputPath(job, new Path(inputPath)); FileInputFormat.setInputDirRecursive(job, true); FileOutputFormat.setOutputPath(job, output); job.setMapOutputValueClass(Put.class); job.addCacheFile(topicsFile.toUri()); job.addCacheFile(vocabFile.toUri()); job.addCacheFile(dateBinFile.toUri()); job.getConfiguration().setBoolean("mapreduce.map.output.compress", true); job.getConfiguration().setClass("mapred.map.output.compression.codec", org.apache.hadoop.io.compress.SnappyCodec.class, org.apache.hadoop.io.compress.CompressionCodec.class); job.getConfiguration().set("hfile.compression", Compression.Algorithm.SNAPPY.getName()); //RegionLocator regionLocator = conn.getRegionLocator(tableName); //HFileOutputFormat2.configureIncrementalLoad(job, new HTable(config,tableName)); Connection con = ConnectionFactory.createConnection(config); TableName htableName = TableName.valueOf(tableName); HFileOutputFormat2.configureIncrementalLoad(job, con.getTable(htableName), con.getRegionLocator(htableName)); job.waitForCompletion(true); if (job.isSuccessful()) { // Couldn't find a better way to do this. The LoadIncrementalHFiles // seems to want 777 permissions on the output directory. try { Runtime rt = Runtime.getRuntime(); rt.exec("hadoop fs -chmod -R 777 " + output); } catch (Exception e) { e.printStackTrace(); } /* LoadIncrementalHFiles loader = new LoadIncrementalHFiles(config); HTable htable = new HTable(config, tableName); loader.doBulkLoad(new Path(outputPath), htable); */ } else { throw new IOException("error with job"); } return 0; // - /* Job job = Job.getInstance(config); job.setJarByClass(ThriftBulkLoader.class); job.setMapOutputKeyClass(ImmutableBytesWritable.class); job.setMapOutputValueClass(Put.class); job.setInputFormatClass(ThriftFileInputFormat.class); //HFileOutputFormat2.configureIncrementalLoad(job, htable); FileInputFormat.addInputPath(job, new Path(inputPath)); FileInputFormat.setInputDirRecursive(job, true); FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.addCacheFile(topicsFile.toUri()); job.addCacheFile(vocabFile.toUri()); job.setMapperClass(ThriftFilterMapper.class); boolean b = job.waitForCompletion(true); if (!b) { throw new IOException("error with job"); } LoadIncrementalHFiles loader = new LoadIncrementalHFiles(config); loader.doBulkLoad(new Path(outputPath), htable); return 0; */ }