Example usage for org.apache.hadoop.mapreduce Job getInstance

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job getInstance.

Prototype

@Deprecated
public static Job getInstance(Cluster ignored, Configuration conf) throws IOException

Source Link

Document

Creates a new Job with no particular Cluster and given Configuration .

Usage

From source file:com.mycompany.keywordsearch.KeywordSearch.java

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    conf.set(FileInputFormat.INPUT_DIR_RECURSIVE, String.valueOf(true));
    Path input = new Path(args[0]);
    Path output = new Path(args[1]);
    BufferedReader in = new BufferedReader(new InputStreamReader(System.in, "UTF-8"));
    System.out.print("Keyword:\t");
    conf.set(KEYWORD, in.readLine());//ww w. ja  v a  2s.c o  m
    Job job = Job.getInstance(conf, "word count");
    job.setJarByClass(KeywordSearch.class);
    job.setInputFormatClass(TextInputFormatV2.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    clearOutput(conf, output);
    FileInputFormat.addInputPath(job, input);
    FileOutputFormat.setOutputPath(job, output);
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.mycompany.searcher.Searcher.java

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();

    BufferedReader in = new BufferedReader(new InputStreamReader(System.in, "UTF-8"));
    System.out.print("Please input a keyword:\t");
    conf.set(KEYWORD, in.readLine());/*from   ww  w .j a v a2 s .  c  o m*/
    conf.set(MINIMUM, args[2]);

    Job job = Job.getInstance(conf, "keyword search");

    job.setJarByClass(Searcher.class);

    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    clearOutput(conf, new Path(args[1]));
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.waitForCompletion(true);
    TimeUnit.SECONDS.sleep(1);

    in = new BufferedReader(
            new InputStreamReader(FileSystem.get(conf).open(new Path(args[1] + "/part-r-00000")), "UTF-8"));
    String line;
    HashMap<String, Integer> map = new HashMap();
    while ((line = in.readLine()) != null) {
        StringTokenizer tok = new StringTokenizer(line);
        map.put(tok.nextToken(), new Integer(tok.nextToken()));
    }

    List<Map.Entry<String, Integer>> list = new ArrayList<Map.Entry<String, Integer>>(map.entrySet());
    Collections.sort(list, new Comparator<Map.Entry<String, Integer>>() {
        public int compare(Map.Entry<String, Integer> entry1, Map.Entry<String, Integer> entry2) {
            return (entry2.getValue() - entry1.getValue());
        }
    });
    for (Map.Entry<String, Integer> entry : list) {
        in = new BufferedReader(
                new InputStreamReader(FileSystem.get(conf).open(new Path(entry.getKey())), "UTF-8"));
        System.out.println("\n" + in.readLine());
        System.out.println("\n" + in.readLine() + ":" + entry.getValue() + "\n");
    }
}

From source file:com.netflix.bdp.s3.TestMRJob.java

License:Apache License

@Test
public void testMRJob() throws Exception {
    FileSystem mockS3 = mock(FileSystem.class);
    FileSystem s3 = S3_OUTPUT_PATH.getFileSystem(getConfiguration());
    if (s3 instanceof MockS3FileSystem) {
        ((MockS3FileSystem) s3).setMock(mockS3);
    } else {/*from  w  ww. ja v  a  2  s . c  om*/
        throw new RuntimeException("Cannot continue: S3 not mocked");
    }

    String commitUUID = UUID.randomUUID().toString();

    int numFiles = 3;
    Set<String> expectedFiles = Sets.newHashSet();
    for (int i = 0; i < numFiles; i += 1) {
        File file = temp.newFile(String.valueOf(i) + ".text");
        try (FileOutputStream out = new FileOutputStream(file)) {
            out.write(("file " + i).getBytes(StandardCharsets.UTF_8));
        }
        expectedFiles.add(new Path(S3_OUTPUT_PATH, "part-m-0000" + i + "-" + commitUUID).toString());
    }

    Job mrJob = Job.getInstance(MR_CLUSTER.getConfig(), "test-committer-job");
    Configuration conf = mrJob.getConfiguration();

    mrJob.setOutputFormatClass(S3TextOutputFormat.class);
    S3TextOutputFormat.setOutputPath(mrJob, S3_OUTPUT_PATH);

    File mockResultsFile = temp.newFile("committer.bin");
    mockResultsFile.delete();
    String committerPath = "file:" + mockResultsFile;
    conf.set("mock-results-file", committerPath);
    conf.set(UPLOAD_UUID, commitUUID);

    mrJob.setInputFormatClass(TextInputFormat.class);
    TextInputFormat.addInputPath(mrJob, new Path("file:" + temp.getRoot().toString()));

    mrJob.setMapperClass(M.class);
    mrJob.setNumReduceTasks(0);

    mrJob.submit();
    Assert.assertTrue("MR job should succeed", mrJob.waitForCompletion(true));

    TestUtil.ClientResults results;
    try (ObjectInputStream in = new ObjectInputStream(
            FileSystem.getLocal(conf).open(new Path(committerPath)))) {
        results = (TestUtil.ClientResults) in.readObject();
    }

    Assert.assertEquals("Should not delete files", 0, results.deletes.size());

    Assert.assertEquals("Should not abort commits", 0, results.aborts.size());

    Assert.assertEquals("Should commit task output files", numFiles, results.commits.size());

    Set<String> actualFiles = Sets.newHashSet();
    for (CompleteMultipartUploadRequest commit : results.commits) {
        actualFiles.add("s3://" + commit.getBucketName() + "/" + commit.getKey());
    }

    Assert.assertEquals("Should commit the correct file paths", expectedFiles, actualFiles);
}

From source file:com.niuwa.hadoop.jobs.sample.JobControlTest.java

License:Apache License

public static void main(String[] args) throws Exception {
    HadoopUtil.isWinOrLiux();/*from w w w  .  j ava  2 s .co  m*/
    Configuration conf = new Configuration();
    String path = "hdfs://ns1:9000/user/root";
    if (args.length != 0) {
        path = args[0];
    }
    String[] args_1 = new String[] { path + "/chubao/input/contact",
            path + "/chubao/temp/" + DateUtil.format(new Date()) + "/contact_total",
            path + "/chubao/temp/" + DateUtil.format(new Date()) + "/contact_total_next" };
    String[] otherArgs = new GenericOptionsParser(conf, args_1).getRemainingArgs();
    // job
    Job job = Job.getInstance(conf, "word count");
    job.setJarByClass(JobControlTest.class);
    job.setMapperClass(UserIdMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    // 
    deleteOutputFile(otherArgs[1], otherArgs[0]);
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

    // job
    Job job2 = Job.getInstance(conf, "job2");
    job2.setJarByClass(JobControlTest.class);
    job2.setMapperClass(AddDateMapper.class);
    job2.setReducerClass(Job2Reducer.class);
    job2.setOutputKeyClass(IntWritable.class);
    job2.setOutputValueClass(Text.class);

    FileInputFormat.addInputPath(job2, new Path(otherArgs[1]));
    // 
    deleteOutputFile(otherArgs[2], otherArgs[1]);
    FileOutputFormat.setOutputPath(job2, new Path(otherArgs[2]));

    // ControlledJob
    ControlledJob controlledJob1 = new ControlledJob(job.getConfiguration());
    ControlledJob controlledJob2 = new ControlledJob(job2.getConfiguration());

    // ?
    controlledJob2.addDependingJob(controlledJob1);

    // JobControl
    JobControl jobControl = new JobControl("JobControlDemoGroup");
    jobControl.addJob(controlledJob1);
    jobControl.addJob(controlledJob2);

    // ?
    Thread jobControlThread = new Thread(jobControl);
    jobControlThread.start();
    while (true) {
        if (jobControl.allFinished()) {
            System.out.println(jobControl.getSuccessfulJobList());
            jobControl.stop();
            break;
        }
    }
}

From source file:com.niuwa.hadoop.jobs.sample.WordCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    HadoopUtil.isWinOrLiux();/*from   www.  ja v  a 2 s  . co m*/
    Configuration conf = new Configuration();
    args = new String[] { "hdfs://192.168.101.219:9000/user/root/input",
            "hdfs://192.168.101.219:9000/user/root/output/count" + new Date().getTime() };
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        System.exit(2);
    }
    Job job = Job.getInstance(conf, "word count");
    job.setJarByClass(WordCount.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.pinterest.terrapin.hadoop.examples.WordCount.java

License:Apache License

public int run(String[] args) throws Exception {
    TerrapinUploaderOptions options = TerrapinUploaderOptions.initFromSystemProperties();

    // Create the job, setting the inputs and map output key and map output value classes.
    // Also, set reducer and mapper.
    Job job = Job.getInstance(super.getConf(), "word count");
    job.setJarByClass(WordCount.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setReducerClass(IntSumReducer.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(args[0]));

    // Wrap around Hadoop Loader job to write the data to a terrapin fileset.
    return new HadoopJobLoader(options, job).waitForCompletion() ? 0 : 1;
}

From source file:com.pivotal.gfxd.demo.mapreduce.LoadAverage.java

License:Open Source License

/**
 * This method is assuming fs.default.name as args[0]
 *
 * @param args/* w  w w.  ja v a 2 s  .  c o  m*/
 * @return
 * @throws Exception
 */
@Override
public int run(String[] args) throws Exception {
    System.out.println("Starting MapReduce Job");
    GfxdDataSerializable.initTypes();
    Configuration conf = new Configuration();
    //Configuration conf = getConf();

    Path outputPath = new Path("/output");
    String hdfsHomeDir = "/sensorStore"; //args[1];
    String tableName = "RAW_SENSOR";
    String outTableName = "LOAD_AVERAGES_SHADOW";
    String gfxdURL = conf.get("gemfirexd.url", "jdbc:gemfirexd://localhost:1527");

    // conf.set("fs.default.name", args[0]);
    String hdfsUrl = conf.get("fs.defaultFS");

    FileSystem hdfs = FileSystem.get(new URI(hdfsUrl), conf);

    // Retrieve last run timestamp
    long now = System.currentTimeMillis();
    long lastStart = getLastStart(hdfs);

    outputPath.getFileSystem(conf).delete(outputPath, true);

    conf.set(RowInputFormat.HOME_DIR, hdfsHomeDir);
    conf.set(RowInputFormat.INPUT_TABLE, tableName);
    conf.setBoolean(RowInputFormat.CHECKPOINT_MODE, false);
    conf.setLong(RowInputFormat.START_TIME_MILLIS, lastStart);
    conf.setLong(RowInputFormat.END_TIME_MILLIS, now);

    conf.set(RowOutputFormat.OUTPUT_URL, gfxdURL);
    conf.set(RowOutputFormat.OUTPUT_TABLE, outTableName);

    // print config to troubleshoot possible issues
    // Configuration.dumpConfiguration(conf, new PrintWriter(System.out));

    Job job = Job.getInstance(conf, "LoadAverage");

    job.setNumReduceTasks(1);

    job.setInputFormatClass(RowInputFormat.class);

    // configure mapper and reducer
    job.setJarByClass(LoadAverage.class);
    job.setMapperClass(LoadAverageMapper.class);
    job.setReducerClass(LoadAverageReducer.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(LoadKey.class);

    TextOutputFormat.setOutputPath(job, outputPath);
    job.setOutputFormatClass(RowOutputFormat.class);
    job.setOutputKeyClass(Key.class);
    job.setOutputValueClass(LoadAverageModel.class);

    boolean jobSuccess = job.waitForCompletion(true);
    if (jobSuccess) {
        writeLastStart(hdfs, now);
    }

    return jobSuccess ? 0 : 1;
}

From source file:com.rw.legion.DefaultJob.java

License:Apache License

/**
 * Main method./* w w w  . j  a  va 2s  . c  o  m*/
 * 
 * @param args  Arguments should be: 1) input path, 2) output path, 3)
 * location of Legion objective file.
 */
public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();

    // Load the Legion objective from the JSON doc.
    Path path = new Path(args[2]);
    FileSystem fs = FileSystem.get(new URI(args[2]), conf);
    BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(path)));
    String json = "";

    String line = br.readLine();

    while (line != null) {
        json += line;
        line = br.readLine();
    }

    br.close();

    /*
     *  Save the JSON for the Legion objective to the Hadoop configuration,
     *  so we can access it in other containers.
     */
    conf.setStrings("legion_objective", json);

    // De-serialize the objective so we can access the settings here.
    LegionObjective legionObjective = ObjectiveDeserializer.deserialize(json);

    // Start configuring the MapReduce job.
    Job hadoopJob = Job.getInstance(conf, "Legion");

    hadoopJob.setJarByClass(DefaultJob.class);
    hadoopJob.setMapperClass(DefaultMapper.class);
    LazyOutputFormat.setOutputFormatClass(hadoopJob, TextOutputFormat.class);

    // Compress the output to speed things up.
    TextOutputFormat.setCompressOutput(hadoopJob, true);
    TextOutputFormat.setOutputCompressorClass(hadoopJob, GzipCodec.class);

    // What input format do we use?

    try {
        @SuppressWarnings("unchecked")
        Class<? extends FileInputFormat<NullWritable, LegionRecord>> inputClass = (Class<? extends FileInputFormat<NullWritable, LegionRecord>>) Class
                .forName(legionObjective.getInputFormat());

        hadoopJob.setInputFormatClass(inputClass);
    } catch (Exception e) {
        throw new JsonParseException(
                "Problem loading input format " + "class '" + legionObjective.getInputFormat() + "'");
    }

    // Should we set a max combined size?

    if (legionObjective.getMaxCombinedSize() != null) {
        CombineFileInputFormat.setMaxInputSplitSize(hadoopJob, legionObjective.getMaxCombinedSize());
    }

    /* 
     * These are just static convenience methods, so it doesn't matter if
     * they come from the wrong class.
     */
    FileInputFormat.setInputDirRecursive(hadoopJob, true);
    FileInputFormat.addInputPath(hadoopJob, new Path(args[0]));

    FileOutputFormat.setOutputPath(hadoopJob, new Path(args[1]));

    // Since a Legion objective can specify multiple output tables.
    for (OutputTable outputTable : legionObjective.getOutputTables()) {
        MultipleOutputs.addNamedOutput(hadoopJob, outputTable.getTitle(), TextOutputFormat.class,
                NullWritable.class, Text.class);
    }

    MultipleOutputs.addNamedOutput(hadoopJob, "skipped", TextOutputFormat.class, NullWritable.class,
            Text.class);

    hadoopJob.waitForCompletion(true);
}

From source file:com.sa.npopa.samples.hbase.FindBadMOBReferences.java

License:Apache License

public static Job createSubmittableJob(Configuration conf, String[] args) throws IOException {
    String tableName = args[0];/*from  w w w  .j a  v  a  2  s . c o m*/

    Job job = Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName));
    job.setJarByClass(FindBadMOBReferences.class);

    Scan scan = new Scan();
    scan.setCacheBlocks(false);
    scan.setBatch(10);
    scan.setAttribute(MobConstants.MOB_SCAN_RAW, Bytes.toBytes(Boolean.TRUE));
    scan.setAttribute(MobConstants.MOB_SCAN_REF_ONLY, Bytes.toBytes(Boolean.TRUE));
    scan.addFamily(Bytes.toBytes("J"));
    //scan.setRowPrefixFilter(Bytes.toBytes("a00"));

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    // job.setOutputFormatClass(NullOutputFormat.class);
    TableMapReduceUtil.initTableMapperJob(tableName, scan, FindBadMOBReferencesMapper.class, Text.class,
            Text.class, job);

    //job.setNumReduceTasks(0);
    job.setReducerClass(FindBadMOBReferencesReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    job.setNumReduceTasks(1);

    FileOutputFormat.setOutputPath(job, new Path("/tmp/out"));
    return job;
}

From source file:com.sa.npopa.samples.hbase.myMR.java

License:Apache License

public static Job createSubmittableJob(Configuration conf, String[] args) throws IOException {
    String tableName = args[0];//from  w w w  .j av a2 s.  c  o  m

    Job job = Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName));
    job.setJarByClass(myMR.class);
    Scan scan = new Scan();
    scan.setCacheBlocks(false);
    scan.setBatch(10);

    //scan.setFilter(new FirstKeyOnlyFilter()); //need to find another filter like key only.
    scan.setFilter(new KeyOnlyFilter());

    job.setOutputFormatClass(NullOutputFormat.class);
    TableMapReduceUtil.initTableMapperJob(tableName, scan, RowCounterMapper.class, ImmutableBytesWritable.class,
            Result.class, job);
    job.setNumReduceTasks(0);
    return job;
}