Example usage for org.apache.hadoop.mapreduce Job getInstance

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job getInstance.

Prototype

@Deprecated
public static Job getInstance(Cluster ignored) throws IOException

Source Link

Document

Creates a new Job with no particular Cluster .

Usage

From source file:de.tudarmstadt.ukp.dkpro.c4corpus.hadoop.statistics.WARCRecordCounter.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

    System.out.println("Other args: " + Arrays.toString(otherArgs));

    Job job = Job.getInstance(conf);
    job.setJarByClass(WARCRecordCounter.class);

    job.setJobName(WARCRecordCounter.class.getName());

    // mapper//from  w ww .ja  v a2 s  .  c  o  m
    job.setMapperClass(ResponseMapper.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    // combiner + reducer
    job.setCombinerClass(MyReducer.class);
    job.setReducerClass(MyReducer.class);

    job.setInputFormatClass(WARCInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    // paths
    String commaSeparatedInputFiles = otherArgs[0];
    String outputPath = otherArgs[1];

    FileInputFormat.addInputPaths(job, commaSeparatedInputFiles);
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:de.tudarmstadt.ukp.dkpro.c4corpus.hadoop.utils.PagesByURLExtractor.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Job job = Job.getInstance(getConf());

    for (Map.Entry<String, String> next : job.getConfiguration()) {
        System.out.println(next.getKey() + ": " + next.getValue());
    }/*  w w w .  j a  va2s.  co m*/

    job.setJarByClass(PagesByURLExtractor.class);
    job.setJobName(PagesByURLExtractor.class.getName());

    // mapper
    job.setMapperClass(MapperClass.class);

    // input
    job.setInputFormatClass(WARCInputFormat.class);

    // output
    job.setOutputFormatClass(WARCOutputFormat.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(WARCWritable.class);
    FileOutputFormat.setCompressOutput(job, true);

    // paths
    String commaSeparatedInputFiles = args[0];
    String outputPath = args[1];

    // load IDs to be searched for
    job.getConfiguration().set(MAPREDUCE_MAPPER_URLS, loadURLs(args[2]));

    FileInputFormat.addInputPaths(job, commaSeparatedInputFiles);
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:de.tudarmstadt.ukp.dkpro.c4corpus.hadoop.utils.TextToSentencesSplitter.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

    Job job = Job.getInstance(conf);
    job.setJarByClass(TextToSentencesSplitter.class);

    job.setJobName(TextToSentencesSplitter.class.getName());

    // mapper//from www  .  j  a  va  2s  .c  om
    job.setMapperClass(TextToSentencesSplitter.MapperClass.class);
    job.setInputFormatClass(WARCInputFormat.class);

    // reducer
    job.setReducerClass(ReducerClass.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    // paths
    String commaSeparatedInputFiles = otherArgs[0];
    String outputPath = otherArgs[1];

    FileInputFormat.addInputPaths(job, commaSeparatedInputFiles);
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:de.tudarmstadt.ukp.dkpro.c4corpus.hadoop.utils.URIExtractor.java

License:Apache License

/**
 * {@inheritDoc}/*from w  ww  . java2 s .  com*/
 */
@Override
public int run(String[] args) throws Exception {

    Job job = Job.getInstance(getConf());
    // set from the command line
    job.setJarByClass(URIExtractor.class);
    job.setJobName(URIExtractor.class.getName());

    // mapper
    job.setMapperClass(URIExtractorMapper.class);
    job.setReducerClass(URIExtractorReducer.class);

    // input-output is warc
    job.setInputFormatClass(WARCInputFormat.class);
    // is necessary, so that Hadoop does not mix the map input format up.
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(NullWritable.class);

    // set output compression to GZip
    FileOutputFormat.setCompressOutput(job, true);
    FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);

    FileInputFormat.addInputPaths(job, args[0]);
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:de.tudarmstadt.ukp.experiments.dip.hadoop.ClueWebTRECIdFileExtractor.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Job job = Job.getInstance(getConf());

    for (Map.Entry<String, String> next : job.getConfiguration()) {
        System.out.println(next.getKey() + ": " + next.getValue());
    }//from   w  ww .jav  a2  s  .com

    job.setJarByClass(ClueWebTRECIdFileExtractor.class);
    job.setJobName(ClueWebTRECIdFileExtractor.class.getName());

    // mapper
    job.setMapperClass(MapperClass.class);

    // input
    job.setInputFormatClass(WARCInputFormat.class);

    // output
    job.setOutputFormatClass(WARCOutputFormat.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(WARCWritable.class);
    FileOutputFormat.setCompressOutput(job, true);

    // paths
    String commaSeparatedInputFiles = args[0];
    String outputPath = args[1];

    // load IDs to be searched for
    job.getConfiguration().set(MAPREDUCE_MAPPER_TREC_IDS, loadTrecIds(args[2]));

    FileInputFormat.addInputPaths(job, commaSeparatedInputFiles);
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:dm_p2_mr.DM_P2_MR.java

public static void main(String[] args) throws Exception {
    // TODO code application logic here
    generateLinkedHashMap("cho.txt");
    while (iter < 10) {
        if (iter == 0) {
            Configuration confg = new Configuration();

            for (int i = 0; i < init_centroids.length; i++) {
                List<Double> exps = linkedHashMap.get(Integer.parseInt(init_centroids[i]));
                StringBuilder temp = new StringBuilder();
                for (int k = 0; k < exps.size(); k++) {
                    temp.append(exps.get(i));
                    temp.append(" ");
                }/*from  ww w  .  j  a  v  a2s .com*/
                confg.set(String.valueOf(i + 1), temp.toString());

            }

            Job job = Job.getInstance(confg);
            job.setJobName("mapreduce");

            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(Text.class);

            job.setMapperClass(kmapper.class);
            //conf.setCombinerClass(Reduce.class);
            job.setReducerClass(kreducer.class);
            //                FileInputFormat.addInputPath(job, new Path(inputPath));
            //                FileOutputFormat.setOutputPath(job, new Path(outputPath));
            //                
            //                job.setInputFormat(TextInputFormat.class);
            //                conf.setOutputFormat(TextOutputFormat.class);
            String filePath = new File("").getAbsolutePath();
            String inputPath = "/input";
            String outputPath = "/output";
            FileInputFormat.setInputPaths(job, new Path(inputPath));
            FileOutputFormat.setOutputPath(job, new Path(outputPath));
            //                for (int i = 0; i < init_centroids.length; i++) {
            //                    List<Double> exps = linkedHashMap.get(Integer.parseInt(init_centroids[i]));
            //                    StringBuilder temp = new StringBuilder();
            //                    for (int k = 0; k < exps.size(); k++) {
            //                        temp.append(exps.get(i));
            //                        temp.append(" ");
            //                    }
            //                    conf.set(init_centroids[i], temp.toString());
            //
            //                }
            job.waitForCompletion(true);
            //JobClient.runJob(job);
        } else {
            Configuration confg = new Configuration();
            FileSystem fOpen = FileSystem.get(confg);
            Path outputPathReduceFile = new Path("/output/part-r-00000");
            BufferedReader reader = new BufferedReader(new InputStreamReader(fOpen.open(outputPathReduceFile)));
            String Line = reader.readLine();
            while (Line != null) {
                String[] split = Line.split(":");
                confg.set(split[0], split[1]);
                Line = reader.readLine();
            }

            //                for (int i = 0; i < init_centroids.length; i++) {
            //                    List<Double> exps = linkedHashMap.get(Integer.parseInt(init_centroids[i]));
            //                    StringBuilder temp = new StringBuilder();
            //                    for (int k = 0; k < exps.size(); k++) {
            //                        temp.append(exps.get(i));
            //                        temp.append(" ");
            //                    }
            //                    confg.set(String.valueOf(i + 1), temp.toString());
            //
            //                }

            Job job = Job.getInstance(confg);
            job.setJobName("mapreduce");

            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(IntWritable.class);

            job.setMapperClass(kmapper.class);
            //conf.setCombinerClass(Reduce.class);
            job.setReducerClass(kreducer.class);
            //                FileInputFormat.addInputPath(job, new Path(inputPath));
            //                FileOutputFormat.setOutputPath(job, new Path(outputPath));
            //                
            //                job.setInputFormat(TextInputFormat.class);
            //                conf.setOutputFormat(TextOutputFormat.class);
            String filePath = new File("").getAbsolutePath();
            String inputPath = "/input";
            String outputPath = "/output";
            FileInputFormat.setInputPaths(job, new Path(inputPath));
            FileOutputFormat.setOutputPath(job, new Path(outputPath));
            //                for (int i = 0; i < init_centroids.length; i++) {
            //                    List<Double> exps = linkedHashMap.get(Integer.parseInt(init_centroids[i]));
            //                    StringBuilder temp = new StringBuilder();
            //                    for (int k = 0; k < exps.size(); k++) {
            //                        temp.append(exps.get(i));
            //                        temp.append(" ");
            //                    }
            //                    conf.set(init_centroids[i], temp.toString());
            //
            //                }
            job.waitForCompletion(true);
            //JobClient.runJob(job);
        }
        iter++;
    }
}

From source file:edu.gslis.ts.hadoop.CollectionStats.java

License:Apache License

public int run(String[] args) throws Exception {
    String tableName = args[0];/*from  w  ww  .ja v  a2s . c  o  m*/
    String inputPath = args[1];

    Configuration config = HBaseConfiguration.create(getConf());
    Job job = Job.getInstance(config);
    job.setJarByClass(CollectionStats.class);
    job.setInputFormatClass(ThriftFileInputFormat.class);

    Scan scan = new Scan();
    scan.setCaching(500);
    scan.setCacheBlocks(false);

    TableMapReduceUtil.initTableReducerJob(tableName, ThriftFilterReducer.class, job);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(MapWritable.class);

    job.setMapperClass(ThriftFilterMapper.class);

    FileInputFormat.addInputPath(job, new Path(inputPath));

    boolean b = job.waitForCompletion(true);
    if (!b) {
        throw new IOException("error with job!");
    }
    return 0;
}

From source file:edu.gslis.ts.hadoop.HbaseRowCount.java

License:Apache License

public int run(String[] args) throws Exception {
    String tableName = args[0];/* ww  w  .  j  ava 2  s . co m*/
    Path outputPath = new Path(args[1]);

    Configuration config = HBaseConfiguration.create(getConf());
    Job job = Job.getInstance(config);
    job.setJarByClass(HbaseRowCount.class);

    Scan scan = new Scan();
    scan.setCaching(500);
    scan.setCacheBlocks(false);

    TableMapReduceUtil.initTableMapperJob(tableName, scan, HbaseCountTableMapper.class, Text.class, // mapper output key
            IntWritable.class, // mapper output value
            job);

    job.setReducerClass(HbaseCountReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setOutputFormatClass(TextOutputFormat.class);

    FileOutputFormat.setOutputPath(job, outputPath);

    boolean b = job.waitForCompletion(true);
    if (!b) {
        throw new IOException("error with job!");
    }
    return 0;
}

From source file:edu.gslis.ts.hadoop.HbaseWordCount.java

License:Apache License

public int run(String[] args) throws Exception {
    String tableName = args[0];/*from   ww w .  j ava2s  .com*/
    Path outputPath = new Path(args[1]);

    Configuration config = HBaseConfiguration.create(getConf());
    Job job = Job.getInstance(config);
    job.setJarByClass(HbaseWordCount.class);

    Scan scan = new Scan();
    scan.setCaching(500);
    scan.setCacheBlocks(false);

    TableMapReduceUtil.initTableMapperJob(tableName, scan, HbaseWcTableMapper.class, Text.class, // mapper output key
            IntWritable.class, // mapper output value
            job);

    job.setReducerClass(HbaseWcReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setOutputFormatClass(TextOutputFormat.class);

    FileOutputFormat.setOutputPath(job, outputPath);

    boolean b = job.waitForCompletion(true);
    if (!b) {
        throw new IOException("error with job!");
    }
    return 0;
}

From source file:edu.gslis.ts.hadoop.ThriftBulkLoader.java

License:Apache License

public int run(String[] args) throws Exception {
    String tableName = args[0];//from  ww w .j  a va  2 s . c  o  m
    String inputPath = args[1];
    String outputPath = args[2];
    Path topicsFile = new Path(args[3]);
    Path vocabFile = new Path(args[4]);
    Path dateBinFile = new Path(args[5]);

    Configuration config = getConf();
    config.set("hbase.table.name", tableName);
    HBaseConfiguration.addHbaseResources(config);

    Job job = Job.getInstance(config);
    job.setJarByClass(ThriftBulkLoader.class);
    job.setJobName("Bulk Loading HBase Table::" + tableName);
    job.setInputFormatClass(ThriftFileInputFormat.class);
    job.setMapOutputKeyClass(ImmutableBytesWritable.class);
    job.setMapperClass(ThriftFilterMapper.class);

    Path output = new Path(outputPath);
    FileInputFormat.addInputPath(job, new Path(inputPath));
    FileInputFormat.setInputDirRecursive(job, true);
    FileOutputFormat.setOutputPath(job, output);

    job.setMapOutputValueClass(Put.class);

    job.addCacheFile(topicsFile.toUri());
    job.addCacheFile(vocabFile.toUri());
    job.addCacheFile(dateBinFile.toUri());

    job.getConfiguration().setBoolean("mapreduce.map.output.compress", true);
    job.getConfiguration().setClass("mapred.map.output.compression.codec",
            org.apache.hadoop.io.compress.SnappyCodec.class,
            org.apache.hadoop.io.compress.CompressionCodec.class);
    job.getConfiguration().set("hfile.compression", Compression.Algorithm.SNAPPY.getName());

    //RegionLocator regionLocator = conn.getRegionLocator(tableName);
    //HFileOutputFormat2.configureIncrementalLoad(job, new HTable(config,tableName));

    Connection con = ConnectionFactory.createConnection(config);
    TableName htableName = TableName.valueOf(tableName);
    HFileOutputFormat2.configureIncrementalLoad(job, con.getTable(htableName),
            con.getRegionLocator(htableName));

    job.waitForCompletion(true);
    if (job.isSuccessful()) {
        // Couldn't find a better way to do this. The LoadIncrementalHFiles
        // seems to want 777 permissions on the output directory.
        try {
            Runtime rt = Runtime.getRuntime();
            rt.exec("hadoop fs -chmod -R 777 " + output);
        } catch (Exception e) {
            e.printStackTrace();
        }
        /*
        LoadIncrementalHFiles loader = new LoadIncrementalHFiles(config);
        HTable htable = new HTable(config, tableName);
        loader.doBulkLoad(new Path(outputPath), htable);
        */

    } else {
        throw new IOException("error with job");
    }

    return 0;

    // - 

    /*
    Job job = Job.getInstance(config);
    job.setJarByClass(ThriftBulkLoader.class);
            
    job.setMapOutputKeyClass(ImmutableBytesWritable.class);  
    job.setMapOutputValueClass(Put.class);  
    job.setInputFormatClass(ThriftFileInputFormat.class);
            
    //HFileOutputFormat2.configureIncrementalLoad(job, htable);
            
    FileInputFormat.addInputPath(job, new Path(inputPath));
    FileInputFormat.setInputDirRecursive(job, true);
    FileOutputFormat.setOutputPath(job, new Path(outputPath));        
            
    job.addCacheFile(topicsFile.toUri());
    job.addCacheFile(vocabFile.toUri());
            
    job.setMapperClass(ThriftFilterMapper.class);
            
    boolean b = job.waitForCompletion(true);
    if (!b) {
    throw new IOException("error with job");
    }
            
    LoadIncrementalHFiles loader = new LoadIncrementalHFiles(config);
    loader.doBulkLoad(new Path(outputPath), htable);
            
    return 0;        
    */
}