Example usage for org.apache.hadoop.mapreduce Job getInstance

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job getInstance.

Prototype

@Deprecated
public static Job getInstance(Cluster ignored) throws IOException

Source Link

Document

Creates a new Job with no particular Cluster .

Usage

From source file:com.knewton.mapreduce.io.SSTableInputFormatTest.java

License:Apache License

/**
 * Tests to see if when given an input directory the {@link SSTableInputFormat} correctly
 * expands all sub directories and picks up all the data tables corresponding to a specific
 * column family./* w  ww .j  av a2s .  com*/
 */
@Test
public void testListStatusWithColumnFamilyName() throws IOException {
    String cfName = "col_fam";
    Job job = Job.getInstance(new Configuration(false));
    Configuration conf = job.getConfiguration();
    SSTableInputFormat.setColumnFamilyName(cfName, job);
    List<FileStatus> result = testListStatus(conf, "./src/test/resources/input/");
    assertEquals(NUM_TABLES * NUM_TOKENS, result.size());
}

From source file:com.knewton.mapreduce.io.SSTableInputFormatTest.java

License:Apache License

/**
 * Tests to see if when given an input directory the {@link SSTableInputFormat} correctly
 * expands all sub directories and picks up all the data tables corresponding to a specific
 * column family when a SNAP directory exists. The SST tables should be skipped.
 */// w  w w .j ava  2 s.  c om
@Test
public void testListStatusWithColumnFamilyNameSkipSST() throws Exception {
    Job job = Job.getInstance(new Configuration(false));
    Configuration conf = job.getConfiguration();
    SSTableInputFormat.setColumnFamilyName("col_fam", job);
    List<FileStatus> result = testListStatus(conf, "./src/test/resources/backup_input");
    assertEquals(NUM_TABLES, result.size());
}

From source file:com.knewton.mapreduce.io.SSTableInputFormatTest.java

License:Apache License

@Test
public void testSetComparatorClass() throws Exception {
    Job job = Job.getInstance(new Configuration(false));
    Configuration conf = job.getConfiguration();
    String comparator = "my_comparator";
    SSTableInputFormat.setComparatorClass(comparator, job);
    assertEquals(comparator, conf.get(PropertyConstants.COLUMN_COMPARATOR.txt));
}

From source file:com.knewton.mapreduce.io.SSTableInputFormatTest.java

License:Apache License

@Test
public void testSetSubComparatorClass() throws Exception {
    Job job = Job.getInstance(new Configuration(false));
    Configuration conf = job.getConfiguration();
    String subComparator = "my_subcomparator";
    SSTableInputFormat.setSubComparatorClass(subComparator, job);
    assertEquals(subComparator, conf.get(PropertyConstants.COLUMN_SUBCOMPARATOR.txt));
}

From source file:com.knewton.mapreduce.io.SSTableInputFormatTest.java

License:Apache License

@Test
public void testPartitionerClass() throws Exception {
    Job job = Job.getInstance(new Configuration(false));
    Configuration conf = job.getConfiguration();
    String partitioner = "my_partitioner";
    SSTableInputFormat.setPartitionerClass(partitioner, job);
    assertEquals(partitioner, conf.get(PropertyConstants.PARTITIONER.txt));
}

From source file:com.knewton.mapreduce.io.SSTableInputFormatTest.java

License:Apache License

@Test
public void testColumnFamilyType() throws Exception {
    Job job = Job.getInstance(new Configuration(false));
    Configuration conf = job.getConfiguration();
    String cfType = "my_cftype";
    SSTableInputFormat.setColumnFamilyType(cfType, job);
    assertEquals(cfType, conf.get(PropertyConstants.COLUMN_FAMILY_TYPE.txt));
}

From source file:com.knewton.mapreduce.io.SSTableInputFormatTest.java

License:Apache License

@Test
public void testSetColumnFamilyName() throws Exception {
    Job job = Job.getInstance(new Configuration(false));
    Configuration conf = job.getConfiguration();
    String cfName = "my_cfName";
    SSTableInputFormat.setColumnFamilyName(cfName, job);
    assertEquals(cfName, conf.get(PropertyConstants.COLUMN_FAMILY_NAME.txt));
}

From source file:com.knewton.mapreduce.io.SSTableInputFormatTest.java

License:Apache License

@Test
public void testSetKeyspaceName() throws Exception {
    Job job = Job.getInstance(new Configuration(false));
    Configuration conf = job.getConfiguration();
    String keyspaceName = "my_keyspaceName";
    SSTableInputFormat.setKeyspaceName(keyspaceName, job);
    assertEquals(keyspaceName, conf.get(PropertyConstants.KEYSPACE_NAME.txt));
}

From source file:com.linkedin.hadoop.example.WordCountCounters.java

License:Apache License

/**
 * Azkaban will look for a method named `run` to start your job. Use this method to setup all the
 * Hadoop-related configuration for your job and submit it.
 *
 * @throws Exception If there is an exception during the configuration or submission of your job
 *///w  w w . j  av a2 s . c  om
public void run() throws Exception {
    _logger.info(String.format("Configuring job for the class %s", getClass().getSimpleName()));

    Job job = Job.getInstance(getConf());
    job.setJarByClass(WordCountJob.class);
    job.setJobName(_name);

    job.setMapperClass(WordCountMapper.class);
    job.setCombinerClass(WordCountCombiner.class);
    job.setReducerClass(WordCountReducer.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(LongWritable.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);

    String inputPath = _properties.getProperty("input.path");
    String outputPath = _properties.getProperty("output.path");
    boolean forceOverwrite = Boolean.parseBoolean(_properties.getProperty("force.output.overwrite", "false"));

    FileInputFormat.addInputPath(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    // Before we submit the job, remove the old the output directory
    if (forceOverwrite) {
        FileSystem fs = FileSystem.get(job.getConfiguration());
        fs.delete(FileOutputFormat.getOutputPath(job), true);
    }

    // Since we have Kerberos enabled at LinkedIn, we must add the token to our configuration. If
    // you don't use Kerberos security for your Hadoop cluster, you don't need this code.
    if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
        job.getConfiguration().set("mapreduce.job.credentials.binary",
                System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
    }

    // Submit the job for execution
    _logger.info(String.format("About to submit the job named %s", _name));
    boolean succeeded = job.waitForCompletion(true);

    // Before we return, display our custom counters for the job in the Azkaban logs
    long inputWords = job.getCounters().findCounter(WordCountCounters.INPUT_WORDS).getValue();
    _logger.info(String.format("Read a total of %d input words", inputWords));

    // Azkaban will not realize the Hadoop job failed unless you specifically throw an exception
    if (!succeeded) {
        throw new Exception(String.format("Azkaban job %s failed", _name));
    }
}

From source file:com.linkedin.pinot.hadoop.io.PinotOutputFormatTest.java

License:Apache License

private void init(String indexType) throws IOException {
    conf = new Configuration();
    job = Job.getInstance(conf);
    fakeTaskAttemptContext = mock(TaskAttemptContext.class);
    outputFormat = new JsonPinotOutputFormat();
    outputTempDir = Files.createTempDirectory(PinotOutputFormatTest.class.getName() + indexType + "_io_output")
            .toFile();//from   w  ww  .j ava2s  . c om
    File workingTempDir = Files
            .createTempDirectory(PinotOutputFormatTest.class.getName() + indexType + "_io_working_dir")
            .toFile();
    // output path
    Path outDir = new Path(outputTempDir.getAbsolutePath());
    PinotOutputFormat.setOutputPath(job, outDir);
    PinotOutputFormat.setTableName(job, "emp");
    PinotOutputFormat.setSegmentName(job, indexType + "segment_one");
    PinotOutputFormat.setTempSegmentDir(job, workingTempDir.getAbsolutePath());

    Schema schema = Schema.fromString(getSchema());
    PinotOutputFormat.setSchema(job, schema);
    mockTaskAttemptContext(indexType);
    segmentTarPath = "_temporary/0/_temporary/attempt_foo_task_" + indexType
            + "_0123_r_000002_2/part-r-00002/segmentTar";

}