Example usage for org.apache.hadoop.conf Configuration setLong

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration setLong.

Prototype

public void setLong(String name, long value)

Source Link

Document

Set the value of the name property to a long.

Usage

From source file:de.l3s.content.timex.extracting.ClueWeb09TimexWriteToHDFS.java

License:Apache License

/**
 * Runs this tool.//w ww.ja  v  a2  s  . c o  m
 */
@SuppressWarnings("static-access")
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(
            OptionBuilder.withArgName("input").hasArg().withDescription("input path").create(INPUT_OPTION));

    options.addOption(
            OptionBuilder.withArgName("output").hasArg().withDescription("output path").create(OUTPUT_OPTION));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();
    cmdline = parser.parse(options, args);

    if (!cmdline.hasOption(INPUT_OPTION)) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    if (!cmdline.hasOption(OUTPUT_OPTION)) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String input = cmdline.getOptionValue(INPUT_OPTION);

    String output = cmdline.getOptionValue(OUTPUT_OPTION);

    LOG.info("Tool name: " + ClueWeb09TimexWriteToHDFS.class.getSimpleName());
    LOG.info(" - input: " + input);
    LOG.info(" - output: " + output);

    Configuration conf = new Configuration();
    long milliSeconds = 10000 * 60 * 60; //x10 default
    conf.setLong("mapred.task.timeout", milliSeconds);
    Job job = Job.getInstance(conf, "extract CW tempex and output to HDFS");
    job.setJarByClass(ClueWeb09TimexWriteToHDFS.class);
    job.setNumReduceTasks(0);

    job.setInputFormatClass(ClueWeb09InputFormat.class);
    job.setMapperClass(TMapper.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(input));
    FileOutputFormat.setOutputPath(job, new Path(output));
    job.waitForCompletion(true);

    return 0;
}

From source file:de.tudarmstadt.ukp.dkpro.bigdata.collocations.CollocDriver.java

License:Apache License

/**
 * pass2: perform the LLR calculation/*from w w  w  .  j  a va  2 s . com*/
 */
private static void computeNGramsPruneByLLR(Path output, Configuration baseConf, long nGramTotal,
        boolean emitUnigrams, float minValue, int reduceTasks)
        throws IOException, InterruptedException, ClassNotFoundException {
    Configuration conf = new Configuration(baseConf);
    conf.setLong(AssocReducer.NGRAM_TOTAL, nGramTotal);
    conf.setBoolean(EMIT_UNIGRAMS, emitUnigrams);
    conf.setFloat(AssocReducer.MIN_VALUE, minValue);
    conf.setInt("mapred.job.map.memory.mb", 1280);
    conf.setInt("mapred.job.reduce.memory.mb", 2560);
    conf.set("mapred.reduce.child.java.opts", "-Xmx2G");
    conf.setInt("mapred.task.timeout", 6000000);
    conf.set(AssocReducer.ASSOC_METRIC, "llr");

    Job job = new Job(conf);
    job.setJobName(CollocDriver.class.getSimpleName() + ".computeNGrams: " + output + " pruning: " + minValue);
    job.setJarByClass(CollocDriver.class);

    job.setMapOutputKeyClass(Gram.class);
    job.setMapOutputValueClass(Gram.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(DoubleWritable.class);
    FileInputFormat.setInputPaths(job, new Path(output, SUBGRAM_OUTPUT_DIRECTORY));
    Path outPath = new Path(output, NGRAM_OUTPUT_DIRECTORY + "_llr");
    FileOutputFormat.setOutputPath(job, outPath);

    job.setMapperClass(Mapper.class);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(org.apache.hadoop.mapreduce.lib.output.TextOutputFormat.class);
    job.setReducerClass(AssocReducer.class);
    job.setNumReduceTasks(reduceTasks);
    // Defines additional single text based output 'text' for the job
    MultipleOutputs.addNamedOutput(job, "contingency", TextOutputFormat.class, Text.class, Text.class);

    // Defines additional multi sequencefile based output 'sequence' for the
    // job
    MultipleOutputs.addNamedOutput(job, "llr", TextOutputFormat.class, Text.class, DoubleWritable.class);
    MultipleOutputs.addNamedOutput(job, "pmi", TextOutputFormat.class, Text.class, DoubleWritable.class);
    MultipleOutputs.addNamedOutput(job, "chi", TextOutputFormat.class, Text.class, DoubleWritable.class);
    MultipleOutputs.addNamedOutput(job, "dice", TextOutputFormat.class, Text.class, DoubleWritable.class);

    boolean succeeded = job.waitForCompletion(true);
    if (!succeeded) {
        throw new IllegalStateException("Job failed!");
    }
}

From source file:de.tudarmstadt.ukp.dkpro.c4corpus.hadoop.standalone.WarcBoilerplateRemoval.java

License:Apache License

public static void processWarcGzFile(File input, File outFile, boolean keepMinimalHtml) throws IOException {
    System.out.printf("Reading from %s, writing to %s%n", input, outFile);

    Configuration conf = new Configuration();
    // set limit to 100 GB (= almost unlimited)
    conf.setLong("warc.output.segment.size", WARCFileWriter.DEFAULT_MAX_SEGMENT_SIZE * 100);

    //Opens a file for reading.
    CompressionCodec codec = WARCFileWriter.getGzipCodec(conf);
    InputStream byteStream = new BufferedInputStream(new FileInputStream(input));
    DataInputStream dataStream = new DataInputStream(
            codec == null ? byteStream : codec.createInputStream(byteStream));

    BoilerPlateRemoval boilerPlateRemoval = new JusTextBoilerplateRemoval();

    long startTime = System.currentTimeMillis();
    int counter = 0;

    int recordsRead = 0;

    Path outputPath = new Path(outFile.getAbsolutePath());
    WARCFileWriter warcFileWriter = new WARCFileWriter(conf, codec, outputPath);

    // detecting the correct charset
    final CharsetDetector charsetDetector = new ICUCharsetDetectorWrapper();

    while (true) {
        try {//from  ww w  .  j  a  v  a2s. c o  m
            //Reads the next record from the file.
            WARCRecord wc = new WARCRecord(dataStream);

            // detect charset
            byte[] bytes = wc.getContent();
            Charset charset = charsetDetector.detectCharset(bytes);

            String html = new String(bytes, charset);

            // strip HTTP header
            html = html.substring(html.indexOf("\r\n\r\n") + 4);

            String plainText;
            if (keepMinimalHtml) {
                plainText = boilerPlateRemoval.getMinimalHtml(html, null);
            } else {
                plainText = boilerPlateRemoval.getPlainText(html, null);
            }

            counter++;
            if (counter % 100 == 0) {
                System.out.printf(Locale.ENGLISH, "~%.1f entries per second%n",
                        counter * 1000f / (double) (System.currentTimeMillis() - startTime));
                System.out.printf(Locale.ENGLISH, "%d records processed%n", recordsRead);
            }

            recordsRead++;

            // create copy of WarcRecord
            WARCRecord newWarcRecord = new WARCRecord(wc);
            newWarcRecord.setContent(plainText);

            warcFileWriter.write(newWarcRecord);
        } catch (EOFException e) {
            break;
        }
    }

    warcFileWriter.close();

    // rename from out.warc.gz.seg-00000.warc.gz to out.warc.gz
    File actualOutputFile = new File(outFile.getAbsolutePath() + ".seg-00000.warc.gz");
    if (!actualOutputFile.exists()) {
        throw new IOException("File " + actualOutputFile + " does not exist");
    }
    if (!actualOutputFile.renameTo(outFile)) {
        throw new IOException("Renaming file " + actualOutputFile + " to " + outFile + " failed");
    }

    // delete .crc file
    File crcFile = new File(actualOutputFile.getParentFile(), "." + actualOutputFile.getName() + ".crc");
    if (!crcFile.delete()) {
        throw new IOException(crcFile + " was not deleted");
    }

    System.out.printf(Locale.ENGLISH, "%d records written to %s, total time %f%n", recordsRead,
            outFile.getName(), counter * 1000f / (double) (System.currentTimeMillis() - startTime));
}

From source file:edu.rosehulman.CollocDriver.java

License:Apache License

/**
 * pass2: perform the LLR calculation//from w ww. j  av  a  2s  .  c om
 */
private static void computeNGramsPruneByLLR(Path output, Configuration baseConf, long nGramTotal,
        boolean emitUnigrams, float minLLRValue, int reduceTasks)
        throws IOException, InterruptedException, ClassNotFoundException {
    Configuration conf = new Configuration(baseConf);
    conf.setLong(LLRReducer.NGRAM_TOTAL, nGramTotal);
    conf.setBoolean(EMIT_UNIGRAMS, emitUnigrams);
    conf.setFloat(LLRReducer.MIN_LLR, minLLRValue);

    Job job = new Job(conf);
    job.setJobName(CollocDriver.class.getSimpleName() + ".computeNGrams: " + output);
    job.setJarByClass(CollocDriver.class);

    job.setMapOutputKeyClass(Gram.class);
    job.setMapOutputValueClass(Gram.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(DoubleWritable.class);

    FileInputFormat.setInputPaths(job, new Path(output, SUBGRAM_OUTPUT_DIRECTORY));
    Path outPath = new Path(output, NGRAM_OUTPUT_DIRECTORY);
    FileOutputFormat.setOutputPath(job, outPath);

    job.setMapperClass(Mapper.class);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setReducerClass(LLRReducer.class);
    job.setNumReduceTasks(reduceTasks);

    boolean succeeded = job.waitForCompletion(true);
    if (!succeeded) {
        throw new IllegalStateException("Job failed!");
    }
}

From source file:hadoop.SleepJob.java

License:Apache License

public Job createJob(int numMapper, int numReducer, long mapSleepTime, int mapSleepCount, long reduceSleepTime,
        int reduceSleepCount) throws IOException {
    Configuration conf = getConf();
    conf.setLong(MAP_SLEEP_TIME, mapSleepTime);
    conf.setLong(REDUCE_SLEEP_TIME, reduceSleepTime);
    conf.setInt(MAP_SLEEP_COUNT, mapSleepCount);
    conf.setInt(REDUCE_SLEEP_COUNT, reduceSleepCount);
    conf.setInt(MRJobConfig.NUM_MAPS, numMapper);
    Job job = new Job(conf, "sleep");
    job.setNumReduceTasks(numReducer);//from   w ww  .j  av  a 2s .c o m
    job.setJarByClass(SleepJob.class);
    job.setNumReduceTasks(numReducer);
    job.setMapperClass(SleepMapper.class);
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(NullWritable.class);
    job.setReducerClass(SleepReducer.class);
    job.setOutputFormatClass(NullOutputFormat.class);
    job.setInputFormatClass(SleepInputFormat.class);
    job.setPartitionerClass(SleepJobPartitioner.class);
    job.setSpeculativeExecution(false);
    job.setJobName("Sleep job");
    FileInputFormat.addInputPath(job, new Path("ignored"));
    return job;
}

From source file:hudson.gridmaven.gridlayer.DataNodeStartTask.java

License:Open Source License

public Void call() throws IOException {
    System.out.println("Starting data node");
    //System.setProperty("java.net.preferIPv4Stack" , "true");
    Configuration conf = new Configuration();
    conf.set("fs.default.name", hdfsUrl);
    conf.set("dfs.data.dir", new File(new File(rootPath), "hadoop/datanode").getAbsolutePath());
    conf.set("dfs.datanode.address", "0.0.0.0:0");
    conf.set("dfs.datanode.http.address", "0.0.0.0:0");
    conf.set("dfs.datanode.ipc.address", "0.0.0.0:0");
    conf.set("slave.host.name", slaveHostName);
    conf.set("dfs.safemode.extension", "1");
    conf.set("dfs.namenode.logging.level", "ALL");
    conf.set("dfs.block.size", "1048576");
    // TODO: make this configurable
    // make room for builds
    conf.setLong("dfs.datanode.du.reserved", 1L * 1024 * 1024 * 1024);

    DataNode dn = DataNode.instantiateDataNode(new String[0], conf);
    DataNode.runDatanodeDaemon(dn);/*from w  ww.  ja va2 s.  c om*/

    return null;
}

From source file:hudson.plugins.hadoop.DataNodeStartTask.java

License:Open Source License

public Void call() throws IOException {
    System.out.println("Starting data node");

    Configuration conf = new Configuration();
    conf.set("fs.default.name", hdfsUrl);
    conf.set("dfs.data.dir", new File(new File(rootPath), "hadoop/datanode").getAbsolutePath());
    conf.set("dfs.datanode.address", "0.0.0.0:0");
    conf.set("dfs.datanode.http.address", "0.0.0.0:0");
    conf.set("dfs.datanode.ipc.address", "0.0.0.0:0");
    conf.set("slave.host.name", slaveHostName);

    // TODO: make this configurable
    // make room for builds
    conf.setLong("dfs.datanode.du.reserved", 10L * 1024 * 1024 * 1024);

    DataNode dn = DataNode.instantiateDataNode(new String[0], conf);
    DataNode.runDatanodeDaemon(dn);//from w ww . j av a 2  s  .  c  om

    return null;
}

From source file:io.covert.dns.storage.accumulo.AccumuloStorageModuleFactory.java

License:Apache License

public static void configure(Job job, String inst, String zooKeepers, String user, String password,
        long maxMemory, long maxLatency, int maxWriteThreads,
        Collection<Class<? extends MutationGeneratorFactory>> generatorFactoryClasses) {
    Configuration conf = job.getConfiguration();
    StringBuilder factories = new StringBuilder();
    boolean first = true;

    for (Class<? extends MutationGeneratorFactory> clz : generatorFactoryClasses) {
        if (first) {
            first = false;//from   w ww  . ja  v  a2  s . com
            factories.append(clz.getName());
        } else {
            factories.append(",").append(clz.getName());
        }
    }

    conf.set("storage.module.factory", AccumuloStorageModuleFactory.class.getName());
    conf.set("accumulo.storage.module.mutation.generator.factories", factories.toString());
    conf.set("accumulo.storage.module.instance.name", inst);
    conf.set("accumulo.storage.module.zookeepers", zooKeepers);
    conf.set("accumulo.storage.module.user", user);
    conf.set("accumulo.storage.module.password", password);
    conf.setLong("accumulo.storage.module.max.memory", maxMemory);
    conf.setLong("accumulo.storage.module.max.latency", maxLatency);
    conf.setInt("accumulo.storage.module.max.write.threads", maxWriteThreads);
}

From source file:io.dataapps.chlorine.hadoop.HDFSScanMR.java

License:Apache License

public static Job makeJob(Configuration conf, Path in, Path out, String matchPath, long scanSince,
        String chlorineConfigFilePath, String queue, String maskPath) throws IOException {
    conf.setBoolean("mapred.output.compress", false);
    conf.setLong("scanSince", scanSince);
    conf.set("matchPath", matchPath);
    conf.set("maskPath", maskPath);
    conf.set("inputPath", in.toString());
    if (queue != null) {
        conf.set("mapred.job.queue.name", queue);
    }//from   www.j  av  a 2s  .c  om
    conf.set("fs.permissions.umask-mode", "007");
    conf.setInt("input_path_depth", in.depth());
    Job job = Job.getInstance(conf, "Chlorine_HDFS_Scan");
    job.setJarByClass(HDFSScanMR.class);
    if (chlorineConfigFilePath != null) {
        try {
            job.addCacheFile(new URI(chlorineConfigFilePath));
            conf.set("finder_file", (new File(chlorineConfigFilePath)).getName());
        } catch (URISyntaxException e) {
            LOG.error(e);
        }
    }
    job.setMapperClass(DeepScanMapper.class);
    job.setNumReduceTasks(0);
    job.setInputFormatClass(TextInputFormat.class);
    TextInputFormat.addInputPath(job, in);
    TextInputFormat.setInputDirRecursive(job, true);
    TextInputFormat.setInputPathFilter(job, NewFilesFilter.class);
    FileOutputFormat.setOutputPath(job, out);
    LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);
    return job;
}

From source file:io.hops.security.HopsUtil.java

License:Apache License

private static Configuration generateSSLServerConf(Configuration conf, String cryptoMaterialPassword) {
    Configuration sslConf = new Configuration(false);
    sslConf.set(/*from   ww w  .  j a  v a  2s.c o m*/
            FileBasedKeyStoresFactory.resolvePropertyName(SSLFactory.Mode.SERVER,
                    FileBasedKeyStoresFactory.SSL_KEYSTORE_LOCATION_TPL_KEY),
            HopsSSLSocketFactory.LOCALIZED_KEYSTORE_FILE_NAME);
    sslConf.set(FileBasedKeyStoresFactory.resolvePropertyName(SSLFactory.Mode.SERVER,
            FileBasedKeyStoresFactory.SSL_KEYSTORE_PASSWORD_TPL_KEY), cryptoMaterialPassword);
    sslConf.set(FileBasedKeyStoresFactory.resolvePropertyName(SSLFactory.Mode.SERVER,
            FileBasedKeyStoresFactory.SSL_KEYSTORE_KEYPASSWORD_TPL_KEY), cryptoMaterialPassword);

    sslConf.set(
            FileBasedKeyStoresFactory.resolvePropertyName(SSLFactory.Mode.SERVER,
                    FileBasedKeyStoresFactory.SSL_TRUSTSTORE_LOCATION_TPL_KEY),
            HopsSSLSocketFactory.LOCALIZED_TRUSTSTORE_FILE_NAME);
    sslConf.set(FileBasedKeyStoresFactory.resolvePropertyName(SSLFactory.Mode.SERVER,
            FileBasedKeyStoresFactory.SSL_TRUSTSTORE_PASSWORD_TPL_KEY), cryptoMaterialPassword);

    sslConf.set(
            FileBasedKeyStoresFactory.resolvePropertyName(SSLFactory.Mode.SERVER,
                    FileBasedKeyStoresFactory.SSL_PASSWORDFILE_LOCATION_TPL_KEY),
            HopsSSLSocketFactory.LOCALIZED_PASSWD_FILE_NAME);

    Configuration sslClientConf = new Configuration(false);
    String sslClientResource = conf.get(SSLFactory.SSL_CLIENT_CONF_KEY, "ssl-client.xml");
    sslClientConf.addResource(sslClientResource);
    long keyStoreReloadInterval = sslClientConf.getLong(
            FileBasedKeyStoresFactory.resolvePropertyName(SSLFactory.Mode.CLIENT,
                    FileBasedKeyStoresFactory.SSL_KEYSTORE_RELOAD_INTERVAL_TPL_KEY),
            FileBasedKeyStoresFactory.DEFAULT_SSL_KEYSTORE_RELOAD_INTERVAL);
    String timeUnitStr = sslClientConf.get(
            FileBasedKeyStoresFactory.resolvePropertyName(SSLFactory.Mode.CLIENT,
                    FileBasedKeyStoresFactory.SSL_KEYSTORE_RELOAD_TIMEUNIT_TPL_KEY),
            FileBasedKeyStoresFactory.DEFAULT_SSL_KEYSTORE_RELOAD_TIMEUNIT);
    long trustStoreReloadInterval = sslClientConf.getLong(
            FileBasedKeyStoresFactory.resolvePropertyName(SSLFactory.Mode.CLIENT,
                    FileBasedKeyStoresFactory.SSL_TRUSTSTORE_RELOAD_INTERVAL_TPL_KEY),
            FileBasedKeyStoresFactory.DEFAULT_SSL_TRUSTSTORE_RELOAD_INTERVAL);

    sslConf.setLong(FileBasedKeyStoresFactory.resolvePropertyName(SSLFactory.Mode.SERVER,
            FileBasedKeyStoresFactory.SSL_KEYSTORE_RELOAD_INTERVAL_TPL_KEY), keyStoreReloadInterval);
    sslConf.set(FileBasedKeyStoresFactory.resolvePropertyName(SSLFactory.Mode.SERVER,
            FileBasedKeyStoresFactory.SSL_KEYSTORE_RELOAD_TIMEUNIT_TPL_KEY), timeUnitStr);
    sslConf.setLong(
            FileBasedKeyStoresFactory.resolvePropertyName(SSLFactory.Mode.SERVER,
                    FileBasedKeyStoresFactory.SSL_TRUSTSTORE_RELOAD_INTERVAL_TPL_KEY),
            trustStoreReloadInterval);

    return sslConf;
}