Example usage for org.apache.hadoop.conf Configuration setLong

List of usage examples for org.apache.hadoop.conf Configuration setLong

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration setLong.

Prototype

public void setLong(String name, long value) 

Source Link

Document

Set the value of the name property to a long.

Usage

From source file:nl.utwente.mirex.AnchorExtract.java

License:Open Source License

/**
 * Runs the MapReduce job "anchor text extraction"
 * @param args 0: path to web collection on HDFS; 1: (non-existing) path that will contain anchor texts
 * @usage. /*w ww  .j  a  va2s . co m*/
 * <code> hadoop jar mirex-0.2.jar nl.utwente.mirex.AnchorExtract /user/hadoop/ClueWeb09_English/&#x2a;/ /user/hadoop/ClueWeb09_Anchors </code> 
 */
public static void main(String[] args) throws Exception {
    // Set job configuration
    Configuration conf = new Configuration();
    conf.setLong("mapred.task.timeout", 1800 * 1000L); // 30 minutes timeout
    Job job = new Job(conf, "AnchorExtract");
    job.setJarByClass(AnchorExtract.class);

    if (args.length != 2) {
        System.out.printf("Usage: %s inputFiles outputFile\n", AnchorExtract.class.getSimpleName());
        System.out.println("          inputFiles: path to data");
        System.out.println("          outputFile: directory where anchor text is stored");
        System.exit(1);
    }
    int argc = 0;
    String inputFiles = args[argc++];
    String outputFile = args[argc++];

    job.setMapperClass(Map.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);

    job.setCombinerClass(Combine.class);

    job.setReducerClass(Reduce.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setInputFormatClass(WarcFileInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    FileInputFormat.setInputPaths(job, new Path(inputFiles)); // '(conf, args[0])' to accept comma-separated list.
    FileOutputFormat.setOutputPath(job, new Path(outputFile));
    FileOutputFormat.setCompressOutput(job, true);
    FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);

    job.waitForCompletion(true);
}

From source file:org.apache.accumulo.examples.mapreduce.TeraSortIngest.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Job job = Job.getInstance(getConf());
    job.setJobName("TeraSortCloud");
    job.setJarByClass(this.getClass());
    Opts opts = new Opts();
    opts.parseArgs(TeraSortIngest.class.getName(), args);

    job.setInputFormatClass(RangeInputFormat.class);
    job.setMapperClass(SortGenMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Mutation.class);

    job.setNumReduceTasks(0);/*from   ww  w .j  av a  2 s  .  c om*/

    job.setOutputFormatClass(AccumuloOutputFormat.class);
    opts.setAccumuloConfigs(job);
    BatchWriterConfig bwConfig = new BatchWriterConfig().setMaxMemory(10L * 1000 * 1000);
    AccumuloOutputFormat.setBatchWriterOptions(job, bwConfig);

    Configuration conf = job.getConfiguration();
    conf.setLong(NUMROWS, opts.numRows);
    conf.setInt("cloudgen.minkeylength", opts.minKeyLength);
    conf.setInt("cloudgen.maxkeylength", opts.maxKeyLength);
    conf.setInt("cloudgen.minvaluelength", opts.minValueLength);
    conf.setInt("cloudgen.maxvaluelength", opts.maxValueLength);
    conf.set("cloudgen.tablename", opts.getTableName());

    if (opts.splits != 0)
        conf.setInt(NUMSPLITS, opts.splits);

    job.waitForCompletion(true);
    return job.isSuccessful() ? 0 : 1;
}

From source file:org.apache.accumulo.examples.simple.mapreduce.TeraSortIngest.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Job job = JobUtil.getJob(getConf());
    job.setJobName("TeraSortCloud");
    job.setJarByClass(this.getClass());
    Opts opts = new Opts();
    opts.parseArgs(TeraSortIngest.class.getName(), args);

    job.setInputFormatClass(RangeInputFormat.class);
    job.setMapperClass(SortGenMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Mutation.class);

    job.setNumReduceTasks(0);/*w  w  w.  ja v  a 2s .  co m*/

    job.setOutputFormatClass(AccumuloOutputFormat.class);
    opts.setAccumuloConfigs(job);
    BatchWriterConfig bwConfig = new BatchWriterConfig().setMaxMemory(10L * 1000 * 1000);
    AccumuloOutputFormat.setBatchWriterOptions(job, bwConfig);

    Configuration conf = job.getConfiguration();
    conf.setLong(NUMROWS, opts.numRows);
    conf.setInt("cloudgen.minkeylength", opts.minKeyLength);
    conf.setInt("cloudgen.maxkeylength", opts.maxKeyLength);
    conf.setInt("cloudgen.minvaluelength", opts.minValueLength);
    conf.setInt("cloudgen.maxvaluelength", opts.maxValueLength);
    conf.set("cloudgen.tablename", opts.getTableName());

    if (args.length > 10)
        conf.setInt(NUMSPLITS, opts.splits);

    job.waitForCompletion(true);
    return job.isSuccessful() ? 0 : 1;
}

From source file:org.apache.accumulo.examples.wikisearch.output.SortingRFileOutputFormat.java

License:Apache License

public static void setMaxBufferSize(Configuration conf, long maxBufferSize) {
    conf.setLong(MAX_BUFFER_SIZE, maxBufferSize);
}

From source file:org.apache.accumulo.server.test.continuous.ContinuousMoru.java

License:Apache License

@Override
public int run(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
    if (args.length != 13) {
        throw new IllegalArgumentException("Usage : " + ContinuousMoru.class.getName()
                + " <instance name> <zookeepers> <user> <pass> <table> <min> <max> <max cf> <max cq> <max mem> <max latency> <num threads> <max maps>");
    }//from ww w . j ava2 s  . c  o  m

    String instance = args[0];
    String zookeepers = args[1];
    String user = args[2];
    String pass = args[3];
    String table = args[4];
    String min = args[5];
    String max = args[6];
    String max_cf = args[7];
    String max_cq = args[8];
    String maxMem = args[9];
    String maxLatency = args[10];
    String numThreads = args[11];
    String maxMaps = args[12];

    Job job = new Job(getConf(), this.getClass().getSimpleName() + "_" + System.currentTimeMillis());
    job.setJarByClass(this.getClass());

    job.setInputFormatClass(AccumuloInputFormat.class);
    AccumuloInputFormat.setInputInfo(job.getConfiguration(), user, pass.getBytes(), table,
            new Authorizations());
    AccumuloInputFormat.setZooKeeperInstance(job.getConfiguration(), instance, zookeepers);

    // set up ranges
    try {
        AccumuloInputFormat.setRanges(job.getConfiguration(),
                new ZooKeeperInstance(instance, zookeepers).getConnector(user, pass.getBytes())
                        .tableOperations().splitRangeByTablets(table, new Range(), Integer.parseInt(maxMaps)));
        AccumuloInputFormat.disableAutoAdjustRanges(job.getConfiguration());
    } catch (Exception e) {
        throw new IOException(e);
    }

    job.setMapperClass(CMapper.class);

    job.setNumReduceTasks(0);

    job.setOutputFormatClass(AccumuloOutputFormat.class);
    Configuration conf = job.getConfiguration();
    AccumuloOutputFormat.setOutputInfo(conf, user, pass.getBytes(), false, table);
    AccumuloOutputFormat.setZooKeeperInstance(conf, instance, zookeepers);
    AccumuloOutputFormat.setMaxLatency(conf, (int) (Integer.parseInt(maxLatency) / 1000.0));
    AccumuloOutputFormat.setMaxMutationBufferSize(conf, Long.parseLong(maxMem));
    AccumuloOutputFormat.setMaxWriteThreads(conf, Integer.parseInt(numThreads));

    conf.setLong(MIN, Long.parseLong(min));
    conf.setLong(MAX, Long.parseLong(max));
    conf.setInt(MAX_CF, Integer.parseInt(max_cf));
    conf.setInt(MAX_CQ, Integer.parseInt(max_cq));
    conf.set(CI_ID, UUID.randomUUID().toString());

    job.waitForCompletion(true);
    return job.isSuccessful() ? 0 : 1;
}

From source file:org.apache.accumulo.test.continuous.ContinuousMoru.java

License:Apache License

@Override
public int run(String[] args)
        throws IOException, InterruptedException, ClassNotFoundException, AccumuloSecurityException {
    Opts opts = new Opts();
    BatchWriterOpts bwOpts = new BatchWriterOpts();
    MapReduceClientOnDefaultTable clientOpts = new MapReduceClientOnDefaultTable("ci");
    clientOpts.parseArgs(ContinuousMoru.class.getName(), args, bwOpts, opts);

    Job job = Job.getInstance(getConf(), this.getClass().getSimpleName() + "_" + System.currentTimeMillis());
    job.setJarByClass(this.getClass());

    job.setInputFormatClass(AccumuloInputFormat.class);
    clientOpts.setAccumuloConfigs(job);//  w  w w .  ja  va2  s. co  m

    // set up ranges
    try {
        Set<Range> ranges = clientOpts.getConnector().tableOperations()
                .splitRangeByTablets(clientOpts.getTableName(), new Range(), opts.maxMaps);
        AccumuloInputFormat.setRanges(job, ranges);
        AccumuloInputFormat.setAutoAdjustRanges(job, false);
    } catch (Exception e) {
        throw new IOException(e);
    }

    job.setMapperClass(CMapper.class);

    job.setNumReduceTasks(0);

    job.setOutputFormatClass(AccumuloOutputFormat.class);
    AccumuloOutputFormat.setBatchWriterOptions(job, bwOpts.getBatchWriterConfig());

    Configuration conf = job.getConfiguration();
    conf.setLong(MIN, opts.min);
    conf.setLong(MAX, opts.max);
    conf.setInt(MAX_CF, opts.maxColF);
    conf.setInt(MAX_CQ, opts.maxColQ);
    conf.set(CI_ID, UUID.randomUUID().toString());

    job.waitForCompletion(true);
    clientOpts.stopTracing();
    return job.isSuccessful() ? 0 : 1;
}

From source file:org.apache.accumulo.test.mapreduce.TeraSortIngest.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Job job = Job.getInstance(getConf());
    job.setJobName("TeraSortCloud");
    job.setJarByClass(this.getClass());
    Opts opts = new Opts();
    opts.parseArgs(TeraSortIngest.class.getName(), args);

    job.setInputFormatClass(RangeInputFormat.class);
    job.setMapperClass(SortGenMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Mutation.class);

    job.setNumReduceTasks(0);/*ww w .  jav  a 2s .c  om*/

    job.setOutputFormatClass(AccumuloOutputFormat.class);
    opts.setAccumuloConfigs(job);
    BatchWriterConfig bwConfig = new BatchWriterConfig().setMaxMemory(10L * 1000 * 1000);
    AccumuloOutputFormat.setBatchWriterOptions(job, bwConfig);

    Configuration conf = job.getConfiguration();
    conf.setLong(NUMROWS, opts.numRows);
    conf.setInt("cloudgen.minkeylength", opts.minKeyLength);
    conf.setInt("cloudgen.maxkeylength", opts.maxKeyLength);
    conf.setInt("cloudgen.minvaluelength", opts.minValueLength);
    conf.setInt("cloudgen.maxvaluelength", opts.maxValueLength);
    conf.set("cloudgen.tablename", opts.getTableName());

    if (args.length > 10)
        conf.setInt(NUMSPLITS, opts.splits);

    job.waitForCompletion(true);
    return job.isSuccessful() ? 0 : 1;
}

From source file:org.apache.accumulo.test.mrit.IntegrationTestMapReduce.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    // read a list of tests from the input, and print out the results
    if (args.length != 2) {
        System.err.println("Wrong number of args: <input> <output>");
        return 1;
    }// ww  w. j  ava  2  s .c o  m
    Configuration conf = getConf();
    Job job = Job.getInstance(conf, "accumulo integration test runner");
    conf = job.getConfiguration();

    // some tests take more than 10 minutes
    conf.setLong(MRJobConfig.TASK_TIMEOUT, 20 * 60 * 1000);

    // minicluster uses a lot of ram
    conf.setInt(MRJobConfig.MAP_MEMORY_MB, 4000);

    // hadoop puts an ancient version of jline on the classpath
    conf.setBoolean(MRJobConfig.MAPREDUCE_JOB_USER_CLASSPATH_FIRST, true);

    // no need to run a test multiple times
    job.setSpeculativeExecution(false);

    // read one line at a time
    job.setInputFormatClass(NLineInputFormat.class);
    NLineInputFormat.setNumLinesPerSplit(job, 1);

    // run the test
    job.setJarByClass(IntegrationTestMapReduce.class);
    job.setMapperClass(TestMapper.class);

    // group test by result code
    job.setReducerClass(TestReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:org.apache.accumulo.testing.core.continuous.ContinuousMoru.java

License:Apache License

@Override
public int run(String[] args)
        throws IOException, InterruptedException, ClassNotFoundException, AccumuloSecurityException {

    Properties props = TestProps.loadFromFile(args[0]);
    ContinuousEnv env = new ContinuousEnv(props);

    Job job = Job.getInstance(getConf(), this.getClass().getSimpleName() + "_" + System.currentTimeMillis());
    job.setJarByClass(this.getClass());

    job.setInputFormatClass(AccumuloInputFormat.class);

    AccumuloInputFormat.setConnectorInfo(job, env.getAccumuloUserName(), env.getToken());
    AccumuloInputFormat.setInputTableName(job, env.getAccumuloTableName());
    AccumuloInputFormat.setZooKeeperInstance(job, env.getClientConfiguration());

    int maxMaps = Integer.parseInt(props.getProperty(TestProps.CI_VERIFY_MAX_MAPS));

    // set up ranges
    try {// www  . j  a  v  a2 s .  co  m
        Set<Range> ranges = env.getAccumuloConnector().tableOperations()
                .splitRangeByTablets(env.getAccumuloTableName(), new Range(), maxMaps);
        AccumuloInputFormat.setRanges(job, ranges);
        AccumuloInputFormat.setAutoAdjustRanges(job, false);
    } catch (Exception e) {
        throw new IOException(e);
    }

    job.setMapperClass(CMapper.class);
    job.setNumReduceTasks(0);
    job.setOutputFormatClass(AccumuloOutputFormat.class);
    AccumuloOutputFormat.setBatchWriterOptions(job, env.getBatchWriterConfig());
    AccumuloOutputFormat.setConnectorInfo(job, env.getAccumuloUserName(), env.getToken());
    AccumuloOutputFormat.setCreateTables(job, true);
    AccumuloOutputFormat.setDefaultTableName(job, env.getAccumuloTableName());
    AccumuloOutputFormat.setZooKeeperInstance(job, env.getClientConfiguration());

    Configuration conf = job.getConfiguration();
    conf.setLong(MIN, env.getRowMin());
    conf.setLong(MAX, env.getRowMax());
    conf.setInt(MAX_CF, env.getMaxColF());
    conf.setInt(MAX_CQ, env.getMaxColQ());
    conf.set(CI_ID, UUID.randomUUID().toString());

    job.waitForCompletion(true);
    return job.isSuccessful() ? 0 : 1;
}

From source file:org.apache.apex.engine.YarnAppLauncherImpl.java

License:Apache License

private void setConfiguration(Configuration conf, String property, Object value) {
    if (value instanceof Integer) {
        conf.setInt(property, (Integer) value);
    } else if (value instanceof Boolean) {
        conf.setBoolean(property, (Boolean) value);
    } else if (value instanceof Long) {
        conf.setLong(property, (Long) value);
    } else if (value instanceof Float) {
        conf.setFloat(property, (Float) value);
    } else if (value instanceof Double) {
        conf.setDouble(property, (Double) value);
    } else {//  ww  w .  j  a  v  a2  s .  c o  m
        conf.set(property, value.toString());
    }
}