Example usage for org.apache.hadoop.mapreduce Job getInstance

List of usage examples for org.apache.hadoop.mapreduce Job getInstance

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job getInstance.

Prototype

@Deprecated
public static Job getInstance(Cluster ignored) throws IOException 

Source Link

Document

Creates a new Job with no particular Cluster .

Usage

From source file:com.pagerankcalculator.TwitterPageRank.java

/**
 * Graph Parsing/*ww w.  j a  va 2s . c om*/
 * Memasukan data mentah dan melakukan inisialisasi pagerank
 * 
 * @param in file data masukan
 * @param out direktori output
 */
public int parseGraph(String in, String out) throws IOException, InterruptedException, ClassNotFoundException {

    Job job = Job.getInstance(getConf());
    job.setJobName("[" + TwitterPageRank.AUTHOR + "]: Job#1 Parsing Graph");
    job.setJarByClass(TwitterPageRank.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setMapperClass(GraphParsingMapper.class);
    job.setReducerClass(GraphParsingReducer.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setNumReduceTasks(TwitterPageRank.NUM_REDUCE_TASKS);

    LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);

    Path inputFilePath = new Path(in);
    Path outputFilePath = new Path(out);

    FileInputFormat.addInputPath(job, inputFilePath);
    FileOutputFormat.setOutputPath(job, outputFilePath);

    FileSystem fs = FileSystem.newInstance(getConf());

    if (fs.exists(outputFilePath)) {
        fs.delete(outputFilePath, true);
    }

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.pagerankcalculator.TwitterPageRank.java

public int calculatePagerank(String in, String out, int iteration)
        throws IOException, InterruptedException, ClassNotFoundException {
    Job job = Job.getInstance(getConf());
    job.setJobName("[" + TwitterPageRank.AUTHOR + "]: Job#2 Iteration-" + iteration + " Calculating Page Rank");
    job.setJarByClass(TwitterPageRank.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setMapperClass(PageRankCalculationMapper.class);
    job.setReducerClass(PageRankCalculationReducer.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setNumReduceTasks(TwitterPageRank.NUM_REDUCE_TASKS);

    LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);

    Path inputFilePath = new Path(in);
    Path outputFilePath = new Path(out);

    FileInputFormat.addInputPath(job, inputFilePath);
    FileOutputFormat.setOutputPath(job, outputFilePath);

    FileSystem fs = FileSystem.newInstance(getConf());

    if (fs.exists(outputFilePath)) {
        fs.delete(outputFilePath, true);
    }//ww  w  . j av  a 2 s.c  o m

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.pagerankcalculator.TwitterPageRank.java

public int sortPagerank(String in, String out)
        throws IOException, InterruptedException, ClassNotFoundException {
    Job job = Job.getInstance(getConf());
    job.setJobName("[" + TwitterPageRank.AUTHOR + "]: Job#3 Sorting Page Rank");
    job.setJarByClass(TwitterPageRank.class);

    job.setMapOutputKeyClass(DoubleWritable.class);
    job.setMapOutputValueClass(Text.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setMapperClass(PageRankSortingMapper.class);
    job.setReducerClass(PageRankSortingReducer.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setNumReduceTasks(1);/*from w ww . jav a 2  s . c o m*/

    LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);

    job.setSortComparatorClass(DoubleSortDescComparator.class);

    Path inputFilePath = new Path(in);
    Path outputFilePath = new Path(out);

    FileInputFormat.addInputPath(job, inputFilePath);
    FileOutputFormat.setOutputPath(job, outputFilePath);

    FileSystem fs = FileSystem.newInstance(getConf());

    if (fs.exists(outputFilePath)) {
        fs.delete(outputFilePath, true);
    }

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.phantom.hadoop.examples.terasort.TeraChecksum.java

License:Apache License

public int run(String[] args) throws Exception {
    Job job = Job.getInstance(getConf());
    if (args.length != 2) {
        usage();//from   ww w  .  ja  v  a  2  s.  c  o  m
        return 2;
    }
    TeraInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    job.setJobName("TeraSum");
    job.setJarByClass(TeraChecksum.class);
    job.setMapperClass(ChecksumMapper.class);
    job.setReducerClass(ChecksumReducer.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Unsigned16.class);
    // force a single reducer
    job.setNumReduceTasks(1);
    job.setInputFormatClass(TeraInputFormat.class);
    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.phantom.hadoop.examples.terasort.TeraGen.java

License:Apache License

/**
 * @param args/*from w w  w.j a v  a 2s . c  om*/
 *            the cli arguments
 */
public int run(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
    Job job = Job.getInstance(getConf());
    if (args.length != 2) {
        usage();
        return 2;
    }
    setNumberOfRows(job, parseHumanLong(args[0]));
    Path outputDir = new Path(args[1]);
    if (outputDir.getFileSystem(getConf()).exists(outputDir)) {
        throw new IOException("Output directory " + outputDir + " already exists.");
    }
    FileOutputFormat.setOutputPath(job, outputDir);
    job.setJobName("TeraGen");
    job.setJarByClass(TeraGen.class);
    job.setMapperClass(SortGenMapper.class);
    job.setNumReduceTasks(0);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    job.setInputFormatClass(RangeInputFormat.class);
    job.setOutputFormatClass(TeraOutputFormat.class);
    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.phantom.hadoop.examples.terasort.TeraSort.java

License:Apache License

public int run(String[] args) throws Exception {
    LOG.info("starting");
    Job job = Job.getInstance(getConf());
    Path inputDir = new Path(args[0]);
    Path outputDir = new Path(args[1]);
    boolean useSimplePartitioner = getUseSimplePartitioner(job);
    TeraInputFormat.setInputPaths(job, inputDir);
    FileOutputFormat.setOutputPath(job, outputDir);
    job.setJobName("TeraSort");
    job.setJarByClass(TeraSort.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    job.setInputFormatClass(TeraInputFormat.class);
    job.setOutputFormatClass(TeraOutputFormat.class);
    if (useSimplePartitioner) {
        job.setPartitionerClass(SimplePartitioner.class);
    } else {/*  w w w  .j a  va2 s.  c  om*/
        long start = System.currentTimeMillis();
        Path partitionFile = new Path(outputDir, TeraInputFormat.PARTITION_FILENAME);
        URI partitionUri = new URI(partitionFile.toString() + "#" + TeraInputFormat.PARTITION_FILENAME);
        try {
            TeraInputFormat.writePartitionFile(job, partitionFile);
        } catch (Throwable e) {
            LOG.error(e.getMessage());
            return -1;
        }
        job.addCacheFile(partitionUri);
        long end = System.currentTimeMillis();
        System.out.println("Spent " + (end - start) + "ms computing partitions.");
        job.setPartitionerClass(TotalOrderPartitioner.class);
    }

    job.getConfiguration().setInt("dfs.replication", getOutputReplication(job));
    TeraOutputFormat.setFinalSync(job, true);
    int ret = job.waitForCompletion(true) ? 0 : 1;
    LOG.info("done");
    return ret;
}

From source file:com.phantom.hadoop.examples.terasort.TeraValidate.java

License:Apache License

public int run(String[] args) throws Exception {
    Job job = Job.getInstance(getConf());
    if (args.length != 2) {
        usage();//from  w w w  . j ava  2  s. c  om
        return 1;
    }
    TeraInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    job.setJobName("TeraValidate");
    job.setJarByClass(TeraValidate.class);
    job.setMapperClass(ValidateMapper.class);
    job.setReducerClass(ValidateReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    // force a single reducer
    job.setNumReduceTasks(1);
    // force a single split
    FileInputFormat.setMinInputSplitSize(job, Long.MAX_VALUE);
    job.setInputFormatClass(TeraInputFormat.class);
    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.philiphubbard.digraph.MRBuildVerticesTest.java

License:Open Source License

public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
    Configuration conf = new Configuration();

    setupTest(conf);//ww w. j  a va 2 s  .  c  o m

    Job job = Job.getInstance(conf);
    job.setJobName("mrbuildverticestest");

    MRBuildVertices.setupJob(job, new Path(testInput), new Path(testOutput));
    conf.setBoolean(MRBuildVertices.CONFIG_PARTITION_BRANCHES_CHAINS, true);

    if (!job.waitForCompletion(true))
        System.exit(1);

    cleanupTest(conf);

    System.exit(0);
}

From source file:com.philiphubbard.digraph.MRCompressChainsTest.java

License:Open Source License

public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
    Configuration conf = new Configuration();

    setupTest(conf);/*from   w  ww  .j a  v  a2  s . c om*/
    String inputOrig = testInput;
    String outputOrig = testOutput;

    int iter = 0;
    boolean keepGoing = true;
    MRCompressChains.beginIteration();
    while (keepGoing) {
        Job job = Job.getInstance(conf);
        job.setJobName("mrcompresschainstest");

        MRCompressChains.setupIterationJob(job, new Path(inputOrig), new Path(outputOrig));

        if (!job.waitForCompletion(true))
            System.exit(1);

        iter++;
        keepGoing = MRCompressChains.continueIteration(job, new Path(inputOrig), new Path(outputOrig));
    }

    //

    System.out.println("Number of iterations = " + iter);

    cleanupTest(conf);

    //

    System.exit(0);
}

From source file:com.philiphubbard.sabe.MRAssembler.java

License:Open Source License

public boolean run(Path inputPath, Path outputPath)
        throws IOException, InterruptedException, ClassNotFoundException {
    Configuration conf = new Configuration();

    // Job.getInstance() copies the Configuration argument, so set its properties first.

    conf.setBoolean(MRVertex.CONFIG_ALLOW_EDGE_MULTIPLES, true);
    conf.setBoolean(MRVertex.CONFIG_COMPRESS_CHAIN_MULTIPLES_MUST_MATCH, false);
    conf.setInt(MRMerVertex.CONFIG_MER_LENGTH, vertexMerLength);
    conf.setBoolean(MRBuildVertices.CONFIG_PARTITION_BRANCHES_CHAINS, true);
    conf.setInt(MRBuildVertices.CONFIG_COVERAGE, coverage);
    conf.setInt(MRCompressChains.CONFIG_TERMINATION_COUNT, 1);

    Job buildJob = Job.getInstance(conf);
    buildJob.setJobName("mrassemblerbuild");

    Path buildInputPath = inputPath;
    Path buildOutputPath = new Path("sabe.MRAssemblerTmp");

    System.out.println("sabe.MRAssembler starting vertex construction");

    MRBuildMerVertices.setupJob(buildJob, buildInputPath, buildOutputPath);

    if (!buildJob.waitForCompletion(true))
        return false;

    ////from www.ja  v a2 s  .c  o m

    Path compressInputPath = new Path(buildOutputPath.toString() + "/chain");
    Path compressOutputPath = new Path(buildOutputPath.toString() + "/chainCompress");

    int iter = 0;
    boolean keepGoing = true;
    MRCompressChains.beginIteration();
    while (keepGoing) {
        Job compressJob = Job.getInstance(conf);
        compressJob.setJobName("mrassemblercompress");

        System.out.println("sabe.MRAssembler starting compression iteration " + iter);

        MRCompressMerChains.setupIterationJob(compressJob, compressInputPath, compressOutputPath);

        if (!compressJob.waitForCompletion(true))
            System.exit(1);

        iter++;
        keepGoing = MRCompressChains.continueIteration(compressJob, compressInputPath, compressOutputPath);
    }

    System.out.println("sabe.MRAssembler made " + iter + " compression iterations");

    //

    Path branchPath = new Path(buildOutputPath.toString() + "/branch");
    Path chainPath = compressOutputPath;

    FileSystem fileSystem = FileSystem.get(conf);

    Graph graph = buildCompressedGraph(conf, fileSystem, branchPath, chainPath);
    if (graph != null) {
        ArrayList<String> result = graph.assemble();

        FSDataOutputStream out = fileSystem.create(outputPath);
        for (String seq : result) {
            out.writeBytes(seq);
            out.writeBytes("\n");
        }
    }

    //

    fileSystem.delete(buildOutputPath, true);

    fileSystem.close();

    return true;
}