Example usage for org.apache.hadoop.conf Configuration setInt

List of usage examples for org.apache.hadoop.conf Configuration setInt

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration setInt.

Prototype

public void setInt(String name, int value) 

Source Link

Document

Set the value of the name property to an int.

Usage

From source file:com.yahoo.glimmer.indexing.generator.TripleIndexGenerator.java

License:Open Source License

public int run(String[] args) throws Exception {
    SimpleJSAP jsap = new SimpleJSAP(TripleIndexGenerator.class.getName(),
            "Generates a keyword index from RDF data.",
            new Parameter[] {
                    new Switch(NO_CONTEXTS_ARG, 'C', "withoutContexts",
                            "Don't process the contexts for each tuple."),
                    new FlaggedOption(METHOD_ARG, JSAP.STRING_PARSER, "horizontal", JSAP.REQUIRED, 'm',
                            METHOD_ARG, "horizontal or vertical."),
                    new FlaggedOption(PREDICATES_ARG, JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED,
                            'p', PREDICATES_ARG, "Subset of the properties to be indexed."),
                    new FlaggedOption(RESOURCE_PREFIX_ARG, JSAP.STRING_PARSER, "@", JSAP.NOT_REQUIRED, 'r',
                            RESOURCE_PREFIX_ARG,
                            "Prefix to add to object resource hash values when indexing. Stops queries for numbers matching resource hash values. Default is '@'"),

                    new UnflaggedOption("input", JSAP.STRING_PARSER, JSAP.REQUIRED,
                            "HDFS location for the input data."),
                    new UnflaggedOption(NUMBER_OF_DOCS_ARG, JSAP.LONG_PARSER, JSAP.REQUIRED,
                            "Number of documents to index"),
                    new UnflaggedOption("output", JSAP.STRING_PARSER, JSAP.REQUIRED,
                            "HDFS location for the output."),
                    new UnflaggedOption(RESOURCES_HASH_ARG, JSAP.STRING_PARSER, JSAP.REQUIRED,
                            "HDFS location of the resources hash file."), });

    JSAPResult jsapResult = jsap.parse(args);

    // check whether the command line was valid, and if it wasn't,
    // display usage information and exit.
    if (!jsapResult.success()) {
        System.err.println();/*ww w  . j  ava  2  s.co  m*/
        System.err.println("Usage: java " + TripleIndexGenerator.class.getName());
        System.err.println("                " + jsap.getUsage());
        System.err.println();
        System.exit(1);
    }

    Job job = Job.getInstance(getConf());
    job.setJarByClass(TripleIndexGenerator.class);
    job.setJobName("TripleIndexGenerator" + System.currentTimeMillis());

    FileInputFormat.setInputPaths(job, new Path(jsapResult.getString("input")));
    job.setInputFormatClass(TextInputFormat.class);

    job.setMapperClass(DocumentMapper.class);
    job.setMapOutputKeyClass(TermKey.class);
    job.setMapOutputValueClass(TermValue.class);

    job.setPartitionerClass(TermKey.FirstPartitioner.class);
    job.setGroupingComparatorClass(TermKey.FirstGroupingComparator.class);

    job.setReducerClass(TermReduce.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(IndexRecordWriterValue.class);
    job.setOutputFormatClass(IndexRecordWriter.OutputFormat.class);
    FileOutputFormat.setOutputPath(job, new Path(jsapResult.getString("output")));

    Configuration conf = job.getConfiguration();

    conf.setClass("mapred.output.key.comparator.class", TermKey.Comparator.class, WritableComparator.class);
    conf.set("mapreduce.user.classpath.first", "true");

    long numDocs = jsapResult.getLong(NUMBER_OF_DOCS_ARG);
    conf.setLong(NUMBER_OF_DOCUMENTS, numDocs);
    // Set this in a attempt to get around the 2GB of ram task limit on our cluster.
    // Setting this in the hope of fixing Direct buffer memory errors
    conf.setInt(INDEX_WRITER_CACHE_SIZE, 1024 * 1024);

    conf.set(OUTPUT_DIR, jsapResult.getString("output"));

    boolean withContexts = !jsapResult.getBoolean(NO_CONTEXTS_ARG, false);
    if (jsapResult.getString(METHOD_ARG).equalsIgnoreCase(METHOD_ARG_VALUE_HORIZONTAL)) {
        HorizontalDocumentFactory.setupConf(conf, withContexts, jsapResult.getString(RESOURCES_HASH_ARG),
                jsapResult.getString(RESOURCE_PREFIX_ARG));
    } else if (jsapResult.getString(METHOD_ARG).equalsIgnoreCase(METHOD_ARG_VALUE_VERTICAL)) {
        if (!jsapResult.contains(PREDICATES_ARG)) {
            throw new IllegalArgumentException("When '" + METHOD_ARG + "' is '" + METHOD_ARG_VALUE_VERTICAL
                    + "' you have to give a predicates file too.");
        }
        VerticalDocumentFactory.setupConf(conf, withContexts, jsapResult.getString(RESOURCES_HASH_ARG),
                jsapResult.getString(RESOURCE_PREFIX_ARG), jsapResult.getString(PREDICATES_ARG));
    } else {
        throw new IllegalArgumentException(METHOD_ARG + " should be '" + METHOD_ARG_VALUE_HORIZONTAL + "' or '"
                + METHOD_ARG_VALUE_VERTICAL + "'");
    }

    conf.setInt("mapreduce.input.linerecordreader.line.maxlength", 1024 * 1024);

    boolean success = job.waitForCompletion(true);

    return success ? 0 : 1;
}

From source file:com.yahoo.storm.yarn.TestIntegration.java

License:Open Source License

@SuppressWarnings({ "rawtypes", "unchecked" })
@BeforeClass// w  w  w  .ja  v  a 2 s.co  m
public static void setup() {
    try {
        zkServer = new EmbeddedZKServer();
        zkServer.start();

        LOG.info("Starting up MiniYARN cluster");
        if (yarnCluster == null) {
            yarnCluster = new MiniYARNCluster(TestIntegration.class.getName(), 2, 1, 1);
            Configuration conf = new YarnConfiguration();
            conf.setInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, 512);
            conf.setInt(YarnConfiguration.RM_SCHEDULER_MAXIMUM_ALLOCATION_MB, 2 * 1024);
            yarnCluster.init(conf);
            yarnCluster.start();
        }
        sleep(2000);

        Configuration miniyarn_conf = yarnCluster.getConfig();
        yarn_site_xml = testConf.createYarnSiteConfig(miniyarn_conf);

        storm_home = testConf.stormHomePath();
        LOG.info("Will be using storm found on PATH at " + storm_home);

        //create a storm configuration file with zkport 
        final Map storm_conf = Config.readStormConfig();
        storm_conf.put(backtype.storm.Config.STORM_ZOOKEEPER_PORT, zkServer.port());
        storm_conf_file = testConf.createConfigFile(storm_conf);

        List<String> cmd = java.util.Arrays.asList("bin/storm-yarn", "launch", storm_conf_file.toString(),
                "--stormZip", "lib/storm.zip", "--appname", "storm-on-yarn-test", "--output",
                "target/appId.txt");
        execute(cmd);

        //wait for Storm cluster to be fully luanched
        sleep(15000);

        BufferedReader reader = new BufferedReader(new FileReader("target/appId.txt"));
        appId = reader.readLine();
        reader.close();
        if (appId != null)
            appId = appId.trim();
        LOG.info("application ID:" + appId);
    } catch (Exception ex) {
        LOG.error("setup failure", ex);
        Assert.assertEquals(null, ex);
    }
}

From source file:com.zjy.mongo.util.MongoConfigUtil.java

License:Apache License

/**
 * Set the maximum number of documents that should be loaded into memory
 * and sent in a batch to MongoDB as the output of a job.
 * @param conf the Configuration/*from ww  w .j  a  va  2s . c  o  m*/
 * @param size the number of documents
 */
public static void setBatchSize(final Configuration conf, final int size) {
    conf.setInt(OUTPUT_BATCH_SIZE, size);
}

From source file:com.zjy.mongo.util.MongoConfigUtil.java

License:Apache License

public static Configuration buildConfiguration(final Map<String, Object> data) {
    Configuration newConf = new Configuration();
    for (Entry<String, Object> entry : data.entrySet()) {
        String key = entry.getKey();
        Object val = entry.getValue();
        if (val instanceof String) {
            newConf.set(key, (String) val);
        } else if (val instanceof Boolean) {
            newConf.setBoolean(key, (Boolean) val);
        } else if (val instanceof Integer) {
            newConf.setInt(key, (Integer) val);
        } else if (val instanceof Float) {
            newConf.setFloat(key, (Float) val);
        } else if (val instanceof DBObject) {
            setDBObject(newConf, key, (DBObject) val);
        } else {/*from  www . java  2  s  .co  m*/
            throw new RuntimeException("can't convert " + val.getClass() + " into any type for Configuration");
        }
    }
    return newConf;
}

From source file:Compress.TestLZO.java

License:Open Source License

public static void main(String[] argv) throws IOException {
    System.out.println(System.getProperty("java.library.path"));

    Configuration conf = new Configuration();

    conf.setInt("io.compression.codec.lzo.buffersize", 64 * 1024);

    LzoCodec codec = new LzoCodec();
    codec.setConf(conf);/*  w ww .  ja  v a2  s.c om*/

    OutputStream out = new DataOutputBuffer();
    CompressionOutputStream out2 = codec.createOutputStream(out);

    byte[] str2 = new byte[20];

    int num = 10000;
    int before = 0;
    String part = "hello konten hello konten";
    for (long i = 0; i < num; i++) {
        Util.long2bytes(str2, i);
        out2.write(str2, 0, 8);

    }
    out2.finish();

    byte[] buffer = ((DataOutputBuffer) out).getData();

    System.out.println("org len:" + num * 8 + ", compressed len:" + ((DataOutputBuffer) out).getLength());

    InputStream in = new DataInputBuffer();
    ((DataInputBuffer) in).reset(((DataOutputBuffer) out).getData(), 0, ((DataOutputBuffer) out).getLength());

    CompressionInputStream in2 = codec.createInputStream(in);

    byte[] buf = new byte[100];
    for (long i = 0; i < num; i++) {
        int count = 0;
        count = in2.read(buf, 0, 8);
        if (count > 0) {
            long value = Util.bytes2long(buf, 0, 8);
            if (value != i) {
                System.out.println(i + ",count:" + count + ",value:" + value);
            } else if (i > (num - 20)) {
                System.out.println(i + ",value:" + value);
            }

        } else {
            System.out.println("count:" + count + ", string " + i);
            break;
        }
    }

    in2.close();

    System.out.println("test compress array...");

    OutputStream out3 = new DataOutputBuffer();
    CompressionOutputStream out4 = codec.createOutputStream(out3);

    DataOutputBuffer tout3 = new DataOutputBuffer();

    for (long i = 0; i < num; i++) {
        Util.long2bytes(str2, i);
        out4.write(str2, 0, 8);
    }
    out4.finish();

    buffer = ((DataOutputBuffer) out3).getData();

    System.out.println("org len:" + num * 8 + ", compressed len:" + ((DataOutputBuffer) out3).getLength());

    InputStream in3 = new DataInputBuffer();
    ((DataInputBuffer) in3).reset(((DataOutputBuffer) out3).getData(), 0,
            ((DataOutputBuffer) out3).getLength());

    CompressionInputStream in4 = codec.createInputStream(in3);

    for (long i = 0; i < num; i++) {
        int count = 0;
        count = in4.read(buf, 0, 8);
        if (count > 0) {
            long value = Util.bytes2long(buf, 0, 8);
            if (value != i) {
                System.out.println(i + ",count:" + count + ",value:" + value);
            }

            if (i > (num - 20)) {
                System.out.println(i + ",value:" + value);
            }

        } else {
            System.out.println("count:" + count + ", string " + i);
            break;
        }
    }

    in2.close();

}

From source file:de.tudarmstadt.ukp.dkpro.bigdata.collocations.CollocDriver.java

License:Apache License

/**
 * pass1: generate collocations, ngrams//from   w w w .  j  a  v  a2s  .c  o m
 */
private static long generateCollocations(Path input, Path output, Configuration baseConf, boolean emitUnigrams,
        int maxNGramSize, int reduceTasks, int minSupport, Window mode, int winsize)
        throws IOException, ClassNotFoundException, InterruptedException {

    Configuration con = new Configuration(baseConf);
    con.setBoolean(EMIT_UNIGRAMS, emitUnigrams);
    con.setInt(CollocMapper.MAX_SHINGLE_SIZE, maxNGramSize);
    con.setInt(CollocReducer.MIN_SUPPORT, minSupport);
    con.set(WINDOW_TYPE, mode.toString());
    con.setInt(WINDOW_SIZE, winsize);

    if (mode.toString().equalsIgnoreCase("DOCUMENT")) {
        con.setInt("mapred.job.map.memory.mb", 3000);

        con.set("mapred.child.java.opts", "-Xmx2900M");
        con.set("mapred.reduce.child.java.opts", "-Xmx8000M");

        con.setInt("mapred.job.reduce.memory.mb", 8120);
    } else {
        con.setInt("mapred.job.map.memory.mb", 2000);

        con.set("mapred.child.java.opts", "-Xmx1900M");
        con.set("mapred.reduce.child.java.opts", "-Xmx2900M");

        con.setInt("mapred.job.reduce.memory.mb", 3000);
    }
    con.setBoolean("mapred.compress.map.output", true);
    con.setStrings("mapred.map.output.compression.codec", "org.apache.hadoop.io.compress.DefaultCodec");
    con.setBoolean("mapred.compress.output", true);
    con.setStrings("mapred.output.compression.codec", "org.apache.hadoop.io.compress.DefaultCodec");
    con.setInt("mapred.task.timeout", 6000000);
    con.setInt("io.sort.factor", 50);
    con.setInt("mapreduce.map.tasks", 256);
    con.setInt("dfs.replication", 1);
    Job job = new Job(con);
    job.setJobName(CollocDriver.class.getSimpleName() + ".generateCollocations:" + input);
    job.setJarByClass(CollocDriver.class);

    job.setMapOutputKeyClass(GramKey.class);
    job.setMapOutputValueClass(Gram.class);
    job.setPartitionerClass(GramKeyPartitioner.class);
    job.setGroupingComparatorClass(GramKeyGroupComparator.class);

    job.setOutputKeyClass(Gram.class);
    job.setOutputValueClass(Gram.class);

    job.setCombinerClass(CollocCombiner.class);

    FileInputFormat.setInputPaths(job, input);

    Path outputPath = new Path(output, SUBGRAM_OUTPUT_DIRECTORY);
    FileOutputFormat.setOutputPath(job, outputPath);

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setMapperClass(CollocMapper.class);

    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setReducerClass(CollocReducer.class);
    job.setNumReduceTasks(512);

    boolean succeeded = job.waitForCompletion(true);
    if (!succeeded) {
        throw new IllegalStateException("Job failed!");
    }

    return job.getCounters().findCounter(CollocMapper.Count.NGRAM_TOTAL).getValue();
}

From source file:de.tudarmstadt.ukp.dkpro.bigdata.collocations.CollocDriver.java

License:Apache License

/**
 * pass2: perform the LLR calculation//from w  ww.ja  v a2 s. c om
 */
private static void computeNGramsPruneByLLR(Path output, Configuration baseConf, long nGramTotal,
        boolean emitUnigrams, float minValue, int reduceTasks)
        throws IOException, InterruptedException, ClassNotFoundException {
    Configuration conf = new Configuration(baseConf);
    conf.setLong(AssocReducer.NGRAM_TOTAL, nGramTotal);
    conf.setBoolean(EMIT_UNIGRAMS, emitUnigrams);
    conf.setFloat(AssocReducer.MIN_VALUE, minValue);
    conf.setInt("mapred.job.map.memory.mb", 1280);
    conf.setInt("mapred.job.reduce.memory.mb", 2560);
    conf.set("mapred.reduce.child.java.opts", "-Xmx2G");
    conf.setInt("mapred.task.timeout", 6000000);
    conf.set(AssocReducer.ASSOC_METRIC, "llr");

    Job job = new Job(conf);
    job.setJobName(CollocDriver.class.getSimpleName() + ".computeNGrams: " + output + " pruning: " + minValue);
    job.setJarByClass(CollocDriver.class);

    job.setMapOutputKeyClass(Gram.class);
    job.setMapOutputValueClass(Gram.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(DoubleWritable.class);
    FileInputFormat.setInputPaths(job, new Path(output, SUBGRAM_OUTPUT_DIRECTORY));
    Path outPath = new Path(output, NGRAM_OUTPUT_DIRECTORY + "_llr");
    FileOutputFormat.setOutputPath(job, outPath);

    job.setMapperClass(Mapper.class);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(org.apache.hadoop.mapreduce.lib.output.TextOutputFormat.class);
    job.setReducerClass(AssocReducer.class);
    job.setNumReduceTasks(reduceTasks);
    // Defines additional single text based output 'text' for the job
    MultipleOutputs.addNamedOutput(job, "contingency", TextOutputFormat.class, Text.class, Text.class);

    // Defines additional multi sequencefile based output 'sequence' for the
    // job
    MultipleOutputs.addNamedOutput(job, "llr", TextOutputFormat.class, Text.class, DoubleWritable.class);
    MultipleOutputs.addNamedOutput(job, "pmi", TextOutputFormat.class, Text.class, DoubleWritable.class);
    MultipleOutputs.addNamedOutput(job, "chi", TextOutputFormat.class, Text.class, DoubleWritable.class);
    MultipleOutputs.addNamedOutput(job, "dice", TextOutputFormat.class, Text.class, DoubleWritable.class);

    boolean succeeded = job.waitForCompletion(true);
    if (!succeeded) {
        throw new IllegalStateException("Job failed!");
    }
}

From source file:eagle.service.hbase.EmbeddedHbase.java

License:Apache License

public void start() {
    try {//from  w  ww  . ja  v a2 s  .  c o  m
        util = new HBaseTestingUtility();
        Configuration conf = util.getConfiguration();
        conf.setInt("test.hbase.zookeeper.property.clientPort", port);
        conf.set("zookeeper.znode.parent", znode);
        conf.setInt("hbase.zookeeper.property.maxClientCnxns", 200);
        conf.setInt("hbase.master.info.port", -1);//avoid port clobbering
        // start mini hbase cluster
        hBaseCluster = util.startMiniCluster();
        Configuration config = hBaseCluster.getConf();

        config.set("zookeeper.session.timeout", "120000");
        config.set("hbase.zookeeper.property.tickTime", "6000");
        config.set(HConstants.HBASE_CLIENT_PAUSE, "3000");
        config.set(HConstants.HBASE_CLIENT_RETRIES_NUMBER, "1");
        config.set(HConstants.HBASE_CLIENT_OPERATION_TIMEOUT, "60000");

        Runtime.getRuntime().addShutdownHook(new Thread() {
            @Override
            public void run() {
                shutdown();
            }
        });
    } catch (Throwable t) {
        LOG.error("Got an exception: ", t);
    }
}

From source file:edu.berkeley.chukwa_xtrace.TestXtrExtract.java

License:Apache License

public void testArchiving() throws Exception {

    System.out.println("starting archive test");
    Configuration conf = new Configuration();
    System.setProperty("hadoop.log.dir", System.getProperty("test.build.data", "/tmp"));
    MiniDFSCluster dfs = new MiniDFSCluster(conf, NUM_HADOOP_SLAVES, true, null);
    FileSystem fileSys = dfs.getFileSystem();
    fileSys.delete(OUTPUT_DIR, true);//nuke output dir

    writeASinkFile(conf, fileSys, INPUT_DIR, 1000);

    FileStatus fstat = fileSys.getFileStatus(INPUT_DIR);
    assertTrue(fstat.getLen() > 10);/*from   w  ww  .  j  av a 2s  .  c  o m*/

    System.out.println("filesystem is " + fileSys.getUri());
    conf.set("fs.default.name", fileSys.getUri().toString());
    conf.setInt("io.sort.mb", 1);
    conf.setInt("io.sort.factor", 5);
    conf.setInt("mapred.tasktracker.map.tasks.maximum", 2);
    conf.setInt("mapred.tasktracker.reduce.tasks.maximum", 2);

    MiniMRCluster mr = new MiniMRCluster(NUM_HADOOP_SLAVES, fileSys.getUri().toString(), 1);
    String[] archiveArgs = { INPUT_DIR.toString(), fileSys.getUri().toString() + OUTPUT_DIR.toString() };

    JobConf jc = mr.createJobConf(new JobConf(conf));
    assertEquals("true", jc.get("archive.groupByClusterName"));
    assertEquals(1, jc.getInt("io.sort.mb", 5));

    int returnVal = ToolRunner.run(jc, new XtrExtract(), archiveArgs);
    assertEquals(0, returnVal);
    fstat = fileSys.getFileStatus(new Path("/chukwa/archives/foocluster/HadoopLogProcessor_2008_05_29.arc"));
    assertTrue(fstat.getLen() > 10);

    Thread.sleep(1000);

    System.out.println("done!");
}

From source file:edu.cuhk.hccl.hadoop.HadoopApp.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    if (args == null || args.length < 4) {
        System.out.println("Please specify parameters: input, output, domain, num-reducers!");
        System.exit(-1);//from   ww w  .j  a v  a 2  s . c om
    }

    String input = args[0];
    String output = args[1];
    String domain = args[2];
    int numReducers = Integer.parseInt(args[3]);
    float similarity = Float.parseFloat(args[4]);
    int range = Integer.parseInt(args[5]);

    Job job = new Job(new Configuration(), this.getClass().getSimpleName());

    // Must below the line of job creation
    Configuration conf = job.getConfiguration();
    // Reuse the JVM
    conf.setInt("mapred.job.reuse.jvm.num.tasks", -1);
    conf.setFloat("SIM_THRESHOLD", similarity);
    conf.setInt("SEARCH_RANGE", range);

    if (domain.equalsIgnoreCase("restaurant")) {
        conf.setStrings("ASPECTS", Constant.RESTAURANT_ASPECTS);
        job.setMapperClass(YelpMapper.class);
        job.setInputFormatClass(TextInputFormat.class);

        // args[4] is the business file to select matching business_ids to restaurant
        String busiFile = args[6];
        DistributedCache.addCacheFile(new URI(busiFile), conf);
    } else if (domain.equalsIgnoreCase("hotel")) {
        conf.setStrings("ASPECTS", Constant.TRIPADVISOR_ASPECTS);
        job.setMapperClass(TripAdvisorMapper.class);
        job.setInputFormatClass(SequenceFileInputFormat.class);
    } else {
        System.out.println("Wrong domain type!");
        System.exit(-1);
    }

    job.setJarByClass(HadoopApp.class);
    job.setReducerClass(ReviewReducer.class);
    job.setNumReduceTasks(numReducers);

    job.setOutputFormatClass(TextOutputFormat.class);
    job.setOutputKeyClass(UserItemPair.class);
    job.setOutputValueClass(NounPhrase.class);

    // Delete output if exists
    Path outputDir = new Path(output);
    FileSystem hdfs = FileSystem.get(conf);
    if (hdfs.exists(outputDir))
        hdfs.delete(outputDir, true);

    FileInputFormat.setInputPaths(job, new Path(input));
    FileOutputFormat.setOutputPath(job, new Path(output));

    job.waitForCompletion(true);
    return 0;
}