Example usage for org.apache.hadoop.mapreduce Job getInstance

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job getInstance.

Prototype

@Deprecated
public static Job getInstance(Cluster ignored, Configuration conf) throws IOException

Source Link

Document

Creates a new Job with no particular Cluster and given Configuration .

Usage

From source file:com.sa.npopa.samples.hbase.RowCounter.java

License:Apache License

/**
 * Sets up the actual job./*ww w  .  j a  v  a2  s.  co m*/
 *
 * @param conf  The current configuration.
 * @param args  The command line parameters.
 * @return The newly created job.
 * @throws IOException When setting up the job fails.
 */
public static Job createSubmittableJob(Configuration conf, String[] args) throws IOException {
    String tableName = args[0];
    String startKey = null;
    String endKey = null;
    long startTime = 0;
    long endTime = 0;

    StringBuilder sb = new StringBuilder();

    final String rangeSwitch = "--range=";
    final String startTimeArgKey = "--starttime=";
    final String endTimeArgKey = "--endtime=";
    final String expectedCountArg = "--expected-count=";

    // First argument is table name, starting from second
    for (int i = 1; i < args.length; i++) {
        if (args[i].startsWith(rangeSwitch)) {
            String[] startEnd = args[i].substring(rangeSwitch.length()).split(",", 2);
            if (startEnd.length != 2 || startEnd[1].contains(",")) {
                printUsage("Please specify range in such format as \"--range=a,b\" "
                        + "or, with only one boundary, \"--range=,b\" or \"--range=a,\"");
                return null;
            }
            startKey = startEnd[0];
            endKey = startEnd[1];
            continue;
        }
        if (args[i].startsWith(startTimeArgKey)) {
            startTime = Long.parseLong(args[i].substring(startTimeArgKey.length()));
            continue;
        }
        if (args[i].startsWith(endTimeArgKey)) {
            endTime = Long.parseLong(args[i].substring(endTimeArgKey.length()));
            continue;
        }
        if (args[i].startsWith(expectedCountArg)) {
            conf.setLong(EXPECTED_COUNT_KEY, Long.parseLong(args[i].substring(expectedCountArg.length())));
            continue;
        }
        // if no switch, assume column names
        sb.append(args[i]);
        sb.append(" ");
    }
    if (endTime < startTime) {
        printUsage("--endtime=" + endTime + " needs to be greater than --starttime=" + startTime);
        return null;
    }

    Job job = Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName));
    job.setJarByClass(RowCounter.class);
    Scan scan = new Scan();
    scan.setCacheBlocks(false);
    if (startKey != null && !startKey.equals("")) {
        scan.setStartRow(Bytes.toBytes(startKey));
    }
    if (endKey != null && !endKey.equals("")) {
        scan.setStopRow(Bytes.toBytes(endKey));
    }
    if (sb.length() > 0) {
        for (String columnName : sb.toString().trim().split(" ")) {
            String family = StringUtils.substringBefore(columnName, ":");
            String qualifier = StringUtils.substringAfter(columnName, ":");

            if (StringUtils.isBlank(qualifier)) {
                scan.addFamily(Bytes.toBytes(family));
            } else {
                scan.addColumn(Bytes.toBytes(family), Bytes.toBytes(qualifier));
            }
        }
    }
    scan.setFilter(new FirstKeyOnlyFilter());
    scan.setTimeRange(startTime, endTime == 0 ? HConstants.LATEST_TIMESTAMP : endTime);
    job.setOutputFormatClass(NullOutputFormat.class);
    TableMapReduceUtil.initTableMapperJob(tableName, scan, RowCounterMapper.class, ImmutableBytesWritable.class,
            Result.class, job);
    job.setNumReduceTasks(0);
    return job;
}

From source file:com.sirius.hadoop.job.onlinetime.OnlineTimeJob.java

License:Apache License

public Job build() throws Exception {
    //init//from ww  w . j  a  v  a  2 s.co m
    Job job = Job.getInstance(getConf(), "onlinetime");
    job.setJarByClass(OnlineTimeJob.class);

    //mapp
    job.setMapperClass(StatusMapper.class);
    job.setMapOutputKeyClass(StatusKey.class);
    job.setMapOutputValueClass(OnlineRecord.class);

    //custom partition
    job.setPartitionerClass(StatusKeyPartitioner.class);

    //reduce
    job.setGroupingComparatorClass(StatusKeyGroupComparator.class);
    job.setReducerClass(StatusReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);

    //input
    FileInputFormat.setInputPaths(job, new Path("/subscriber_status/subscriber_status.json"));

    //output
    FileOutputFormat.setOutputPath(job, out);
    FileOutputFormat.setCompressOutput(job, true);
    FileOutputFormat.setOutputCompressorClass(job, Lz4Codec.class);

    return job;
}

From source file:com.streamsets.pipeline.emr.EmrBinding.java

License:Apache License

@Override
public void init() throws Exception {
    Configuration conf = new Configuration();
    LOG.info("Arg 0: {}, Arg 1: {}, Arg 2: {}, Arg 3: {}, Arg 4: {}", args[0], args[1], args[2], args[3],
            args[4]);/*www . j a  v  a2s .co m*/
    try (InputStream in = getClass().getClassLoader().getResourceAsStream("cluster_sdc.properties")) {
        properties = new Properties();
        properties.load(in);
        String dataFormat = Utils.getHdfsDataFormat(properties);
        for (Object key : properties.keySet()) {
            String realKey = String.valueOf(key);
            // TODO - Override other configs set in HdfsSource
            if (overriddenConfs.contains(realKey)) {
                String value = Utils.getPropertyNotNull(properties, realKey);
                conf.set(realKey, value);
            }
        }
        String javaOpts = args[3];
        Integer mapMemoryMb = HadoopMapReduceBinding.getMapMemoryMb(javaOpts, conf);
        if (mapMemoryMb != null) {
            conf.set(HadoopMapReduceBinding.MAPREDUCE_MAP_MEMORY_MB, String.valueOf(mapMemoryMb));
        }
        conf.set(HadoopMapReduceBinding.MAPREDUCE_JAVA_OPTS, javaOpts);

        conf.setBoolean("mapreduce.map.speculative", false);
        conf.setBoolean("mapreduce.reduce.speculative", false);
        if ("AVRO".equalsIgnoreCase(dataFormat)) {
            conf.set(Job.INPUT_FORMAT_CLASS_ATTR, "org.apache.avro.mapreduce.AvroKeyInputFormat");
            conf.set(Job.MAP_OUTPUT_KEY_CLASS, "org.apache.avro.mapred.AvroKey");
        }

        conf.set(MRJobConfig.MAP_LOG_LEVEL, args[4]);
        job = Job.getInstance(conf, "StreamSets Data Collector: "
                + properties.getProperty(ClusterModeConstants.CLUSTER_PIPELINE_TITLE) + "::" + args[2]);
        for (String archive : Arrays.asList(args[0].split("\\s*,\\s*"))) {
            job.addCacheArchive(new URI(archive));
        }
        for (String libJar : Arrays.asList(args[1].split("\\s*,\\s*"))) {
            job.addFileToClassPath(new Path(libJar));
        }
        job.setJarByClass(this.getClass());
        job.setNumReduceTasks(0);
        if (!"AVRO".equalsIgnoreCase(dataFormat)) {
            job.setOutputKeyClass(NullWritable.class);
        }
        job.setMapperClass(PipelineMapper.class);

        job.setOutputValueClass(NullWritable.class);

        job.setOutputFormatClass(NullOutputFormat.class);
    }

}

From source file:com.streamsets.pipeline.hadoop.HadoopMapReduceBinding.java

License:Apache License

@Override
public void init() throws Exception {
    Configuration conf = new Configuration();
    GenericOptionsParser parser = new GenericOptionsParser(conf, args);
    String[] remainingArgs = parser.getRemainingArgs();
    properties = new Properties();
    if (remainingArgs.length != 2) {
        List<String> argsList = new ArrayList<>();
        for (String arg : remainingArgs) {
            argsList.add("'" + arg + "'");
        }//w  w  w. ja v  a  2 s.  co m
        throw new IllegalArgumentException("Error expected properties-file java-opts got: " + argsList);
    }
    String propertiesFile = remainingArgs[0];
    String javaOpts = remainingArgs[1];
    try (InputStream in = new FileInputStream(propertiesFile)) {
        properties.load(in);
        String dataFormat = getProperty("dataFormat");
        String source = this.getClass().getSimpleName();
        for (Object key : properties.keySet()) {
            String realKey = String.valueOf(key);
            String value = getProperty(realKey);
            conf.set(realKey, value, source);
        }
        conf.set("mapred.child.java.opts", javaOpts);
        conf.setBoolean("mapreduce.map.speculative", false);
        conf.setBoolean("mapreduce.reduce.speculative", false);
        if (dataFormat.equalsIgnoreCase("AVRO")) {
            conf.set(Job.INPUT_FORMAT_CLASS_ATTR, "org.apache.avro.mapreduce.AvroKeyInputFormat");
            conf.set(Job.MAP_OUTPUT_KEY_CLASS, "org.apache.avro.mapred.AvroKey");
        }
        job = Job.getInstance(conf, "StreamSets Data Collector - Batch Execution Mode");
        job.setJarByClass(this.getClass());
        job.setNumReduceTasks(0);
        if (!dataFormat.equalsIgnoreCase("AVRO")) {
            job.setOutputKeyClass(NullWritable.class);
        }
        job.setMapperClass(PipelineMapper.class);

        job.setOutputValueClass(NullWritable.class);

        job.setOutputFormatClass(NullOutputFormat.class);
    }
}

From source file:com.studium.millionsong.mapreduce.CompleteToStripped.java

public static void main(String[] args) throws Exception {

    Path inputPath = new Path("/millionSong/complete.csv");
    Path outputPath = new Path("/millionSong/result/run1");

    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "From complete to stripped dataset");

    // Job configuration:
    // 0. Set har which contains this classes
    job.setJarByClass(CompleteToStripped.class);

    // 1. Which Mapper and Reduce should be used
    job.setMapperClass(CompleteStrippedMapper.class);
    job.setReducerClass(StrippedReducer.class);

    // 2. Which are the output datatypes of the mapper- and reduce-functions
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    // 3. Set local combiner for data reduction
    job.setCombinerClass(StrippedReducer.class);

    // 4. Where are the input file(s)
    // Default FileInputFormat is TextInputFormat, so its using
    // the correct implementation automatically.
    FileInputFormat.addInputPath(job, inputPath);
    FileOutputFormat.setOutputPath(job, outputPath);

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.telefonica.iot.tidoop.apiext.hadoop.ckan.CKANInputFormatTest.java

License:Open Source License

/**
 * Sets up tests by creating a unique instance of the tested class, and by defining the behaviour of the mocked
 * classes./*from  ww w. j  a v  a2 s.  c  om*/
 *  
 * @throws Exception
 */
@Before
public void setUp() throws Exception {
    // set up the instance of the tested class
    ckanInputFormat = new CKANInputFormat();

    // set up the other instances
    conf = new Configuration();
    job = Job.getInstance(conf, "testGetSplitsResource");

    // set up the behavious of the mocked classes
    when(backend.getNumRecords(resId)).thenReturn(numRecords);
}

From source file:com.telefonica.iot.tidoop.apiext.hadoop.ckan.CKANOutputFormatTest.java

License:Open Source License

/**
 * Sets up tests by creating a unique instance of the tested class, and by defining the behaviour of the mocked
 * classes.//from w  w w.ja v  a2 s.  c  om
 *  
 * @throws Exception
 */
@Before
public void setUp() throws Exception {
    // set up the instance of the tested class
    ckanOutputFormat = new CKANOutputFormat();

    // set up the other instances
    conf = new Configuration();
    job = Job.getInstance(conf, "CKANOutputFormatTest");

    // set up the behavious of the mocked classes
    when(backend.getNumRecords(resId)).thenReturn(numRecords);
}

From source file:com.telefonica.iot.tidoop.apiext.utils.CKANMapReduceExample.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    // check the number of arguments, show the usage if it is wrong
    if (args.length != 7) {
        showUsage();//w  w  w. j a va 2s  . com
        return -1;
    } // if

    // get the arguments
    String ckanHost = args[0];
    String ckanPort = args[1];
    boolean sslEnabled = args[2].equals("true");
    String ckanAPIKey = args[3];
    String ckanInputs = args[4];
    String ckanOutput = args[5];
    String splitsLength = args[6];

    // create and configure a MapReduce job
    Configuration conf = this.getConf();
    Job job = Job.getInstance(conf, "CKAN MapReduce test");
    job.setJarByClass(CKANMapReduceExample.class);
    job.setMapperClass(RecordSizeGetter.class);
    job.setCombinerClass(RecordSizeAdder.class);
    job.setReducerClass(RecordSizeAdder.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    job.setInputFormatClass(CKANInputFormat.class);
    CKANInputFormat.setInput(job, ckanInputs);
    CKANInputFormat.setEnvironment(job, ckanHost, ckanPort, sslEnabled, ckanAPIKey);
    CKANInputFormat.setSplitsLength(job, splitsLength);
    job.setOutputFormatClass(CKANOutputFormat.class);
    CKANOutputFormat.setEnvironment(job, ckanHost, ckanPort, sslEnabled, ckanAPIKey);
    CKANOutputFormat.setOutputPkg(job, ckanOutput);

    // run the MapReduce job
    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.telefonica.iot.tidoop.mrlib.jobs.Filter.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    // check the number of arguments, show the usage if it is wrong
    if (args.length != 3) {
        showUsage();//from   w  ww  .  j  av a2  s.c o m
        return -1;
    } // if

    // get the arguments
    String input = args[0];
    String output = args[1];
    String regex = args[2];

    // create and configure a MapReduce job
    Configuration conf = this.getConf();
    conf.set(Constants.PARAM_REGEX, regex);
    Job job = Job.getInstance(conf, "tidoop-mr-lib-filter");
    job.setNumReduceTasks(1);
    job.setJarByClass(Filter.class);
    job.setMapperClass(LineFilter.class);
    job.setCombinerClass(LinesCombiner.class);
    job.setReducerClass(LinesJoiner.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);
    FileInputFormat.addInputPath(job, new Path(input));
    FileOutputFormat.setOutputPath(job, new Path(output));

    // run the MapReduce job
    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.telefonica.iot.tidoop.mrlib.jobs.MapOnly.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    // check the number of arguments, show the usage if it is wrong
    if (args.length != 3) {
        showUsage();//from   www. j a  v a 2s  .c o m
        return -1;
    } // if

    // get the arguments
    String input = args[0];
    String output = args[1];
    String mapFunction = args[2];

    // create and configure a MapReduce job
    Configuration conf = this.getConf();
    conf.set(Constants.PARAM_FUNCTION, mapFunction);
    Job job = Job.getInstance(conf, "tidoop-mr-lib-maponly");
    job.setNumReduceTasks(0);
    job.setJarByClass(MapOnly.class);
    job.setMapperClass(CustomMapper.class);
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);
    FileInputFormat.addInputPath(job, new Path(input));
    FileOutputFormat.setOutputPath(job, new Path(output));

    // run the MapReduce job
    return job.waitForCompletion(true) ? 0 : 1;
}