Example usage for org.apache.hadoop.conf Configuration setInt

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration setInt.

Prototype

public void setInt(String name, int value)

Source Link

Document

Set the value of the name property to an int.

Usage

From source file:clustering.inverted_index.Driver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length < 2) {
        System.err.printf("usage: %s tf_idf_result_dir output_dir" + "[decimal_number] [pruning_threshold]\n",
                getClass().getSimpleName());
        System.exit(1);//  w w w. j a v  a  2  s.  c om
    }

    Path normDir = new Path(args[1] + "/normed");
    Path resultDir = new Path(args[1] + "/result");

    Configuration conf = getConf();
    conf = MapReduceUtils.initConf(conf);

    if (args.length > 2) {
        conf.setInt("deci.number", Integer.valueOf(args[2]));
    } else {
        conf.setInt("deci.number", 4);
    }

    if (args.length > 3) {
        conf.setBoolean("pruning", true);
        conf.setDouble("pruning.threshold", Double.valueOf(args[3]));
    } else {
        conf.setBoolean("pruning", false);
    }

    JobControl jobControl = new JobControl("inverted-index jobs");

    /* step 1, normalize the vector lenth of each document */

    Job job1 = Job.getInstance(conf, "tf idf normalizer job");
    job1.setJarByClass(Driver.class);

    FileInputFormat.addInputPath(job1, new Path(args[0]));
    job1.setInputFormatClass(KeyValueTextInputFormat.class);

    job1.setMapperClass(Mapper.class);

    job1.setReducerClass(NormalizerReducer.class);
    job1.setOutputKeyClass(Text.class);
    job1.setOutputValueClass(Text.class);

    FileOutputFormat.setOutputPath(job1, normDir);

    ControlledJob controlledJob1 = new ControlledJob(conf);
    controlledJob1.setJob(job1);
    jobControl.addJob(controlledJob1);

    /* step 2, calculate inverted index */

    Job job2 = Job.getInstance(conf, "inverted index job");
    job2.setJarByClass(Driver.class);

    FileInputFormat.addInputPath(job2, normDir);

    job2.setInputFormatClass(KeyValueTextInputFormat.class);

    job2.setMapperClass(Mapper.class);

    job2.setReducerClass(InvertedIndexReducer.class);
    job2.setOutputKeyClass(Text.class);
    job2.setOutputValueClass(Text.class);

    FileOutputFormat.setOutputPath(job2, resultDir);

    ControlledJob controlledJob2 = new ControlledJob(conf);
    controlledJob2.setJob(job2);
    controlledJob2.addDependingJob(controlledJob1);
    jobControl.addJob(controlledJob2);

    MapReduceUtils.runJobs(jobControl);

    return job2.waitForCompletion(true) ? 0 : 1;
}

From source file:clustering.mst.Driver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length < 3) {
        System.err.printf("usage: %s similarity_result_dir document_count_file output_dir "
                + "[cluster_threshold] [reduce_number] [compression]\n", getClass().getSimpleName());
        System.exit(1);/*  www . j av  a 2s.  co  m*/
    }

    Path step1_OutputDir = new Path(args[2] + "/step1");
    Path resultDir = new Path(args[2] + "/result");

    URI docCntFile = new URI(args[1] + "/part-r-00000#docCnt");

    Configuration conf = getConf();
    conf = MapReduceUtils.initConf(conf);

    if (args.length > 3) {
        conf.setDouble("final.threshold", Double.valueOf(args[3]));
    } else {
        conf.setDouble("final.threshold", 0.2d);
    }
    if (args.length > 4) {
        conf.setInt("reduce.task.num", Integer.valueOf(args[4]));
    } else {
        conf.setInt("reduce.task.num", 5);
    }

    JobControl jobControl = new JobControl("mst jobs");

    /* step 1, split and calculate the child msts */

    Job childJob = Job.getInstance(conf, "mst child job");
    childJob.setJarByClass(Driver.class);

    childJob.addCacheFile(docCntFile);

    if (args.length > 5 && args[5].equals("0")) {
        FileInputFormat.addInputPath(childJob, new Path(args[0]));
        childJob.setInputFormatClass(KeyValueTextInputFormat.class);
    } else {
        SequenceFileInputFormat.addInputPath(childJob, new Path(args[0]));
        childJob.setInputFormatClass(SequenceFileAsTextInputFormat.class);
    }

    FileOutputFormat.setOutputPath(childJob, step1_OutputDir);

    childJob.setMapperClass(ChildMapper.class);
    childJob.setMapOutputKeyClass(DoubleWritable.class);
    childJob.setMapOutputValueClass(Text.class);

    childJob.setPartitionerClass(ChildPartitioner.class);

    childJob.setReducerClass(ChildReducer.class);
    childJob.setNumReduceTasks(conf.getInt("reduce.task.num", 1));
    childJob.setOutputKeyClass(DoubleWritable.class);
    childJob.setOutputValueClass(Text.class);

    ControlledJob controlledChildJob = new ControlledJob(conf);
    controlledChildJob.setJob(childJob);
    jobControl.addJob(controlledChildJob);

    /* step 2, merge step 1's output and calculate final mst */

    Job finalJob = Job.getInstance(conf, "mst final job");
    finalJob.setJarByClass(FinalReducer.class);

    finalJob.addCacheFile(docCntFile);

    FileInputFormat.addInputPath(finalJob, step1_OutputDir);
    finalJob.setInputFormatClass(KeyValueTextInputFormat.class);

    finalJob.setMapperClass(FinalMapper.class);
    finalJob.setMapOutputKeyClass(DoubleWritable.class);
    finalJob.setMapOutputValueClass(Text.class);

    finalJob.setReducerClass(FinalReducer.class);
    finalJob.setOutputKeyClass(IntWritable.class);
    finalJob.setOutputValueClass(IntWritable.class);

    FileOutputFormat.setOutputPath(finalJob, resultDir);

    ControlledJob finalControlledJob = new ControlledJob(conf);
    finalControlledJob.setJob(finalJob);
    finalControlledJob.addDependingJob(controlledChildJob);
    jobControl.addJob(finalControlledJob);

    // run jobs

    MapReduceUtils.runJobs(jobControl);

    return finalJob.waitForCompletion(true) ? 0 : 1;
}

From source file:clustering.simhash.Driver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length < 2) {
        System.err.printf("usage: %s init_result_dir output_dir [simhash_threshold]\n",
                getClass().getSimpleName());
        System.exit(1);/*  w  w  w.j a v  a  2  s .co  m*/
    }

    Path step1_outputDir = new Path(args[1] + "/step1");

    Configuration conf = getConf();
    conf = MapReduceUtils.initConf(conf);

    if (args.length > 2) {
        conf.setInt("simhash.threshold", Integer.valueOf(args[2]));
    } else {
        conf.setInt("simhash.threshold", 3);
    }

    JobControl jobControl = new JobControl("simhash jobs");

    Job job1 = Job.getInstance(conf, "simhash step1 job");
    job1.setJarByClass(Driver.class);

    FileInputFormat.addInputPath(job1, new Path(args[0]));
    job1.setInputFormatClass(KeyValueTextInputFormat.class);

    job1.setMapperClass(Step1Mapper.class);
    job1.setMapOutputKeyClass(LongWritable.class);
    job1.setMapOutputValueClass(Text.class);

    job1.setReducerClass(Step1Reducer.class);
    job1.setOutputKeyClass(IntWritable.class);
    job1.setOutputValueClass(Text.class);

    FileOutputFormat.setOutputPath(job1, step1_outputDir);

    ControlledJob controlledJob1 = new ControlledJob(conf);
    controlledJob1.setJob(job1);
    jobControl.addJob(controlledJob1);

    Job job2 = Job.getInstance(conf, "simhash step2 job");
    job2.setJarByClass(Driver.class);

    FileInputFormat.addInputPath(job2, step1_outputDir);
    job2.setInputFormatClass(KeyValueTextInputFormat.class);

    job2.setMapperClass(Step2Mapper.class);
    job2.setMapOutputKeyClass(IntWritable.class);
    job2.setMapOutputValueClass(Text.class);

    job2.setReducerClass(Step2Reducer.class);
    job2.setOutputKeyClass(IntWritable.class);
    job2.setOutputValueClass(Text.class);

    FileOutputFormat.setOutputPath(job2, new Path(args[1] + "/result"));

    ControlledJob controlledJob2 = new ControlledJob(conf);
    controlledJob2.setJob(job2);
    controlledJob2.addDependingJob(controlledJob1);
    jobControl.addJob(controlledJob2);

    long starttime = System.currentTimeMillis();
    clustering.Utils.MapReduceUtils.runJobs(jobControl);

    boolean complete = job2.waitForCompletion(true);
    long endtime = System.currentTimeMillis();
    System.out.println("simhash job finished in: " + (endtime - starttime) / 1000 + " seconds");

    return complete ? 0 : 1;
}

From source file:clustering.similarity.ISimDriver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length < 2) {
        System.err.printf("usage: %s simpre_dir output_dir " + "[compression_or_not] [reduce_task_number]\n",
                getClass().getSimpleName());
        System.exit(1);//from w w w .  j a  v  a 2 s.co  m
    }

    Configuration conf = getConf();
    conf = MapReduceUtils.initConf(conf);

    Job job = Job.getInstance(conf, "isim job");
    job.setJarByClass(ISimDriver.class);

    if (args.length > 2 && args[2].equals("0")) {
        FileInputFormat.addInputPath(job, new Path(args[0]));
        job.setInputFormatClass(KeyValueTextInputFormat.class);
        FileOutputFormat.setOutputPath(job, new Path(args[1]));
    } else {
        job.setInputFormatClass(SequenceFileAsTextInputFormat.class);
        SequenceFileInputFormat.addInputPath(job, new Path(args[0]));

        conf.setBoolean("mapreduce.map.output.compress", true);
        conf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.GzipCodec");

        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        SequenceFileOutputFormat.setCompressOutput(job, true);
        SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK);
        SequenceFileOutputFormat.setOutputCompressorClass(job, org.apache.hadoop.io.compress.GzipCodec.class);
        SequenceFileOutputFormat.setOutputPath(job, new Path(args[1]));
    }

    if (args.length > 3) {
        conf.setInt("reduce.num", Integer.valueOf(args[3]));
    } else {
        conf.setInt("reduce.num", 5);
    }

    job.setMapperClass(ISimMapper.class);
    job.setMapOutputKeyClass(IntIntTupleWritable.class);
    job.setMapOutputValueClass(DoubleWritable.class);

    job.setCombinerClass(ISimCombiner.class);
    job.setPartitionerClass(HashPartitioner.class);

    job.setNumReduceTasks(conf.getInt("reduce.num", 1));

    job.setReducerClass(ISimReducer.class);
    job.setOutputKeyClass(IntIntTupleWritable.class);
    job.setOutputValueClass(DoubleWritable.class);

    long starttime = System.currentTimeMillis();
    boolean complete = job.waitForCompletion(true);
    long endtime = System.currentTimeMillis();
    System.out.println("inverted similarity job finished in: " + (endtime - starttime) / 1000 + " seconds");

    return complete ? 0 : 1;
}

From source file:clustering.similarity.PreDriver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length < 2) {
        System.err.printf(/*from w  w w  . j  av  a2  s .  c o  m*/
                "usage: %s inverted_index_result_dir output_dir"
                        + " [compress_or_not] [reducer_number] [deci_number]\n",
                this.getClass().getSimpleName());
        System.exit(1);
    }
    Configuration conf = getConf();

    conf = MapReduceUtils.initConf(conf);
    conf.set("mapreduce.reduce.speculative", "false");

    // TODO: 17-4-24 calculate split number from reducer number
    conf.setInt("split.num", 8);

    if (args.length > 3) {
        conf.setInt("reducer.num", Integer.valueOf(args[3]));
    } else {
        conf.setInt("reducer.num", 29);
    }
    if (args.length > 4) {
        conf.setInt("deci.number", Integer.valueOf(args[4]));
    } else {
        conf.setInt("deci.number", 3);
    }

    Job job = Job.getInstance(conf, "pre job");
    job.setJarByClass(PreDriver.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    job.setInputFormatClass(KeyValueTextInputFormat.class);

    job.setMapperClass(PreMapper.class);
    job.setMapOutputKeyClass(IntIntTupleWritable.class);
    job.setMapOutputValueClass(Text.class);

    job.setPartitionerClass(PrePartitioner.class);

    job.setNumReduceTasks(conf.getInt("reducer.num", 29));
    job.setReducerClass(PreReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    // set default compression
    if (args.length > 2 && args[2].equals("0")) {
        FileOutputFormat.setOutputPath(job, new Path(args[1]));
    } else {
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        SequenceFileOutputFormat.setCompressOutput(job, true);
        SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK);
        SequenceFileOutputFormat.setOutputCompressorClass(job, org.apache.hadoop.io.compress.GzipCodec.class);
        SequenceFileOutputFormat.setOutputPath(job, new Path(args[1]));
    }

    long starttime = System.currentTimeMillis();
    boolean complete = job.waitForCompletion(true);
    long endtime = System.currentTimeMillis();
    System.out.println("inverted similarity pre job finished in: " + (endtime - starttime) / 1000 + " seconds");

    return complete ? 0 : 1;
}

From source file:cn.easyhbase.client.hbase.HBaseAsyncOperationFactory.java

License:Apache License

public static HBaseAsyncOperation create(Configuration configuration) throws IOException {
    boolean enableAsyncMethod = configuration.getBoolean(ENABLE_ASYNC_METHOD, DEFAULT_ENABLE_ASYNC_METHOD);
    LOGGER.info("hbase.client.async.enable: " + enableAsyncMethod);
    if (!enableAsyncMethod) {
        return DisabledHBaseAsyncOperation.INSTANCE;
    }/*ww w.  j ava  2 s .c  o m*/

    int queueSize = configuration.getInt(ASYNC_IN_QUEUE_SIZE, DEFAULT_ASYNC_IN_QUEUE_SIZE);

    if (configuration.get(ASYNC_PERIODIC_FLUSH_TIME, null) == null) {
        configuration.setInt(ASYNC_PERIODIC_FLUSH_TIME, DEFAULT_ASYNC_PERIODIC_FLUSH_TIME);
    }

    if (configuration.get(ASYNC_RETRY_COUNT, null) == null) {
        configuration.setInt(ASYNC_RETRY_COUNT, DEFAULT_ASYNC_RETRY_COUNT);
    }

    return new HBaseAsyncTemplate(configuration, queueSize);
}

From source file:cn.easyhbase.client.hbase.HBaseAsyncOperationFactory.java

License:Apache License

public static HBaseAsyncOperation create(Connection connection, Configuration configuration)
        throws IOException {
    boolean enableAsyncMethod = configuration.getBoolean(ENABLE_ASYNC_METHOD, DEFAULT_ENABLE_ASYNC_METHOD);
    if (!enableAsyncMethod) {
        return DisabledHBaseAsyncOperation.INSTANCE;
    }/*from  ww  w  .  j  ava2  s  .c o  m*/

    int queueSize = configuration.getInt(ASYNC_IN_QUEUE_SIZE, DEFAULT_ASYNC_IN_QUEUE_SIZE);

    if (configuration.get(ASYNC_PERIODIC_FLUSH_TIME, null) == null) {
        configuration.setInt(ASYNC_PERIODIC_FLUSH_TIME, DEFAULT_ASYNC_PERIODIC_FLUSH_TIME);
    }

    if (configuration.get(ASYNC_RETRY_COUNT, null) == null) {
        configuration.setInt(ASYNC_RETRY_COUNT, DEFAULT_ASYNC_RETRY_COUNT);
    }

    return new HBaseAsyncTemplate(connection, configuration, queueSize);
}

From source file:co.cask.cdap.hbase.wd.RowKeyDistributorTestBase.java

License:Apache License

@BeforeClass
public static void beforeClass() throws Exception {
    if (!runBefore) {
        return;/*from   www  .ja  va  2s  .co  m*/
    }

    testingUtility = new HBaseTestingUtility();
    Configuration hConf = testingUtility.getConfiguration();
    hConf.set("yarn.is.minicluster", "true");
    // Tune down the connection thread pool size
    hConf.setInt("hbase.hconnection.threads.core", 5);
    hConf.setInt("hbase.hconnection.threads.max", 10);
    // Tunn down handler threads in regionserver
    hConf.setInt("hbase.regionserver.handler.count", 10);

    // Set to random port
    hConf.setInt("hbase.master.port", Networks.getRandomPort());
    hConf.setInt("hbase.master.info.port", Networks.getRandomPort());
    hConf.setInt("hbase.regionserver.port", Networks.getRandomPort());
    hConf.setInt("hbase.regionserver.info.port", Networks.getRandomPort());

    testingUtility.startMiniCluster();
    hTable = testingUtility.createTable(TABLE, CF);
}

From source file:co.cask.cdap.hbase.wd.RowKeyDistributorTestBase.java

License:Apache License

private void testMapReduceInternal(long origKeyPrefix, Scan scan, int numValues, int startWithValue,
        int seekIntervalMinValue, int seekIntervalMaxValue)
        throws IOException, InterruptedException, ClassNotFoundException {
    int valuesCountInSeekInterval = writeTestData(origKeyPrefix, numValues, startWithValue,
            seekIntervalMinValue, seekIntervalMaxValue);

    // Reading data
    Configuration conf = new Configuration(testingUtility.getConfiguration());
    conf.set("fs.defaultFS", "file:///");
    conf.set("fs.default.name", "file:///");
    conf.setInt("mapreduce.local.map.tasks.maximum", 16);
    conf.setInt("mapreduce.local.reduce.tasks.maximum", 16);
    Job job = Job.getInstance(conf, "testMapReduceInternal()-Job");
    TableMapReduceUtil.initTableMapperJob(TABLE_NAME, scan, RowCounterMapper.class,
            ImmutableBytesWritable.class, Result.class, job);

    // Substituting standard TableInputFormat which was set in TableMapReduceUtil.initTableMapperJob(...)
    job.setInputFormatClass(WdTableInputFormat.class);
    keyDistributor.addInfo(job.getConfiguration());

    job.setOutputFormatClass(NullOutputFormat.class);
    job.setNumReduceTasks(0);// w  w  w  . j av  a2s.  c o m

    boolean succeeded = job.waitForCompletion(true);
    Assert.assertTrue(succeeded);

    long mapInputRecords = job.getCounters().findCounter(RowCounterMapper.Counters.ROWS).getValue();
    Assert.assertEquals(valuesCountInSeekInterval, mapInputRecords);

    // Need to kill the job after completion, after it could leave MRAppMaster running not terminated.
    // Not sure what causing this, but maybe problem in MiniYarnCluster
    job.killJob();
}

From source file:co.cask.cdap.internal.app.runtime.batch.inmemory.LocalClientProtocolProvider.java

License:Apache License

@Override
public ClientProtocol create(Configuration conf) throws IOException {
    String framework = conf.get(MRConfig.FRAMEWORK_NAME, MRConfig.LOCAL_FRAMEWORK_NAME);
    LOG.info("Using framework: " + framework);
    if (!MRConfig.LOCAL_FRAMEWORK_NAME.equals(framework)) {
        return null;
    }/*from   w  ww  .j  a v a  2 s  .  c  om*/

    // We have to use something unique like "clocal" to make sure Hadoop's LocalClientProtocolProvider will fail to
    // provide the ClientProtocol
    String tracker = conf.get(JTConfig.JT_IPC_ADDRESS, "clocal");
    LOG.info("Using tracker: " + tracker);

    if ("clocal".equals(tracker)) {
        conf.setInt("mapreduce.job.maps", 1);
        return new LocalJobRunnerWithFix(conf);
    } else {

        throw new IOException("Invalid \"" + JTConfig.JT_IPC_ADDRESS
                + "\" configuration value for LocalJobRunner : \"" + tracker + "\"");
    }
}