Example usage for org.apache.hadoop.conf Configuration setLong

List of usage examples for org.apache.hadoop.conf Configuration setLong

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration setLong.

Prototype

public void setLong(String name, long value) 

Source Link

Document

Set the value of the name property to a long.

Usage

From source file:com.github.ygf.pagerank.PageRank.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length != 3) {
        System.out.println("Usage: PageRank <links-simple-sorted.txt> <titles-dir> <output-dir>");
        ToolRunner.printGenericCommandUsage(System.out);
        return 2;
    }//  ww w  .ja v  a  2s  . c  o  m

    Path linksFile = new Path(args[0]);
    Path titlesDir = new Path(args[1]);
    Path outputDir = new Path(args[2]);

    Configuration conf = getConf();

    // Do not create _SUCCESS files. MapFileOutputFormat.getReaders calls
    // try to read the _SUCCESS as another MapFile dir.
    conf.set("mapreduce.fileoutputcommitter.marksuccessfuljobs", "false");

    // Default values of the parameters of the algorithm.
    conf.setInt("pagerank.block_size", conf.getInt("pagerank.block_size", 10000));
    conf.setInt("pagerank.max_iterations", conf.getInt("pagerank.max_iterations", 2));
    conf.setFloat("pagerank.damping_factor", conf.getFloat("pagerank.damping_factor", 0.85f));
    conf.setInt("pagerank.top_results", conf.getInt("pagerank.top_results", 100));

    conf.set("pagerank.titles_dir", titlesDir.toString());
    int numPages = getNumPages(conf, titlesDir);
    conf.setLong("pagerank.num_pages", numPages);

    createTransitionMatrix(conf, linksFile, outputDir);

    int maxIters = Integer.parseInt(conf.get("pagerank.max_iterations"));
    for (int iter = 1; iter <= maxIters; iter++) {
        conf.setInt("pagerank.iteration", iter);
        pageRankIteration(iter, conf, outputDir);
        cleanPreviousIteration(iter, conf, outputDir);
    }

    summarizeResults(maxIters, conf, outputDir);

    return 0;
}

From source file:com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystemIntegrationTest.java

License:Open Source License

/**
 * Validates that we correctly build our Options object
 * from a Hadoop config./*from   w w w.  ja  va 2  s  .c  o  m*/
 */
@Test
public void testBuildOptionsFromConfig() throws IOException {
    GoogleHadoopFileSystem fs = new GoogleHadoopFileSystem();
    Configuration config = loadConfig("projectId", "serviceAccount", "priveKeyFile");

    GoogleCloudStorageFileSystemOptions.Builder optionsBuilder = fs.createOptionsBuilderFromConfig(config);
    GoogleCloudStorageFileSystemOptions options = optionsBuilder.build();
    GoogleCloudStorageOptions gcsOptions = options.getCloudStorageOptions();

    Assert.assertTrue(gcsOptions.isAutoRepairImplicitDirectoriesEnabled());
    Assert.assertFalse(gcsOptions.isInferImplicitDirectoriesEnabled());

    config.setBoolean(GoogleHadoopFileSystemBase.GCS_ENABLE_REPAIR_IMPLICIT_DIRECTORIES_KEY, false);
    config.setBoolean(GoogleHadoopFileSystemBase.GCS_ENABLE_INFER_IMPLICIT_DIRECTORIES_KEY, true);
    config.setLong(GoogleHadoopFileSystemBase.GCS_METADATA_CACHE_MAX_ENTRY_AGE_KEY, 2222L);
    config.setLong(GoogleHadoopFileSystemBase.GCS_METADATA_CACHE_MAX_INFO_AGE_KEY, 1111L);

    optionsBuilder = fs.createOptionsBuilderFromConfig(config);
    options = optionsBuilder.build();
    Assert.assertEquals(2222L, options.getCacheMaxEntryAgeMillis());
    Assert.assertEquals(1111L, options.getCacheMaxInfoAgeMillis());

    gcsOptions = options.getCloudStorageOptions();
    Assert.assertFalse(gcsOptions.isAutoRepairImplicitDirectoriesEnabled());
    Assert.assertTrue(gcsOptions.isInferImplicitDirectoriesEnabled());
}

From source file:com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystemIntegrationTest.java

License:Open Source License

/**
 * Validates success path in initialize().
 *//*from   w ww  .j av a 2s.  co m*/
@Test
@Override
public void testInitializeSuccess() throws IOException, URISyntaxException {
    GoogleHadoopFileSystem fs = null;

    // Reuse loadConfig() to initialize auth related settings.
    Configuration config = loadConfig();

    // Set up remaining settings to known test values.
    int bufferSize = 512;
    config.setInt(GoogleHadoopFileSystemBase.BUFFERSIZE_KEY, bufferSize);
    long blockSize = 1024;
    config.setLong(GoogleHadoopFileSystemBase.BLOCK_SIZE_KEY, blockSize);
    String systemBucketName = ghfsHelper.getUniqueBucketName("-system-bucket");
    String rootBucketName = ghfsHelper.getUniqueBucketName("-root-bucket");
    config.set(GoogleHadoopFileSystemBase.GCS_SYSTEM_BUCKET_KEY, systemBucketName);

    URI initUri = (new Path("gs://" + rootBucketName)).toUri();
    try {
        fs = new GoogleHadoopFileSystem();
        fs.initialize(initUri, config);
    } catch (IOException e) {
        Assert.fail("Unexpected exception");
    }

    // Verify that config settings were set correctly.
    Assert.assertEquals(bufferSize, fs.getBufferSizeOverride());
    Assert.assertEquals(blockSize, fs.getDefaultBlockSize());
    Assert.assertEquals(systemBucketName, fs.getSystemBucketName());
    Assert.assertEquals(initUri, fs.initUri);
    Assert.assertEquals(rootBucketName, fs.getRootBucketName());

    initUri = (new Path("gs:/foo")).toUri();
    try {
        fs = new GoogleHadoopFileSystem();
        fs.initialize(initUri, config);
    } catch (IOException e) {
        Assert.fail("Unexpected exception");
    }

    // Verify that config settings were set correctly.
    Assert.assertEquals(bufferSize, fs.getBufferSizeOverride());
    Assert.assertEquals(blockSize, fs.getDefaultBlockSize());
    Assert.assertEquals(systemBucketName, fs.getSystemBucketName());
    Assert.assertEquals(initUri, fs.initUri);
    Assert.assertEquals(systemBucketName, fs.getRootBucketName());
}

From source file:com.google.cloud.hadoop.fs.gcs.GoogleHadoopGlobalRootedFileSystemIntegrationTest.java

License:Open Source License

/**
 * Validates success path in initialize().
 *//*  w  w  w. j a va  2 s  .com*/
@Test
@Override
public void testInitializeSuccess() throws IOException, URISyntaxException {
    GoogleHadoopFileSystemBase fs = null;

    // Reuse loadConfig() to initialize auth related settings.
    Configuration config = loadConfig();

    // Set up remaining settings to known test values.
    int bufferSize = 512;
    config.setInt(GoogleHadoopFileSystemBase.BUFFERSIZE_KEY, bufferSize);
    long blockSize = 1024;
    config.setLong(GoogleHadoopFileSystemBase.BLOCK_SIZE_KEY, blockSize);
    String systemBucketName = ghfsHelper.getUniqueBucketName("-system-bucket");
    config.set(GoogleHadoopFileSystemBase.GCS_SYSTEM_BUCKET_KEY, systemBucketName);

    URI initUri = (new Path("gsg://bucket-should-be-ignored")).toUri();
    try {
        fs = new GoogleHadoopGlobalRootedFileSystem();
        fs.initialize(initUri, config);
    } catch (IOException e) {
        Assert.fail("Unexpected exception");
    }

    // Verify that config settings were set correctly.
    Assert.assertEquals(bufferSize, fs.getBufferSizeOverride());
    Assert.assertEquals(blockSize, fs.getDefaultBlockSize());
    Assert.assertEquals(systemBucketName, fs.getSystemBucketName());
    Assert.assertEquals(initUri, fs.initUri);
}

From source file:com.hadoop.mapreduce.TestLzoTextInputFormat.java

License:Open Source License

/**
 * Generate random data, compress it, index and md5 hash the data.
 * Then read it all back and md5 that too, to verify that it all went ok.
 * // w w w  .  j a va  2 s.c om
 * @param testWithIndex Should we index or not?
 * @param charsToOutput How many characters of random data should we output.
 * @throws IOException
 * @throws NoSuchAlgorithmException
 * @throws InterruptedException
 */
private void runTest(boolean testWithIndex, int charsToOutput)
        throws IOException, NoSuchAlgorithmException, InterruptedException {

    if (!GPLNativeCodeLoader.isNativeCodeLoaded()) {
        LOG.warn("Cannot run this test without the native lzo libraries");
        return;
    }

    Configuration conf = new Configuration();
    conf.setLong("fs.local.block.size", charsToOutput / 2);
    // reducing block size to force a split of the tiny file
    conf.set("io.compression.codecs", LzopCodec.class.getName());

    FileSystem localFs = FileSystem.getLocal(conf);
    localFs.delete(outputDir, true);
    localFs.mkdirs(outputDir);

    Job job = new Job(conf);
    TextOutputFormat.setCompressOutput(job, true);
    TextOutputFormat.setOutputCompressorClass(job, LzopCodec.class);
    TextOutputFormat.setOutputPath(job, outputDir);

    TaskAttemptContext attemptContext = new TaskAttemptContextImpl(job.getConfiguration(),
            new TaskAttemptID("123", 0, TaskType.REDUCE, 1, 2));

    // create some input data
    byte[] expectedMd5 = createTestInput(outputDir, localFs, attemptContext, charsToOutput);

    if (testWithIndex) {
        Path lzoFile = new Path(outputDir, lzoFileName);
        LzoTextInputFormat.createIndex(localFs, lzoFile);
    }

    LzoTextInputFormat inputFormat = new LzoTextInputFormat();
    TextInputFormat.setInputPaths(job, outputDir);

    List<InputSplit> is = inputFormat.getSplits(job);
    //verify we have the right number of lzo chunks
    if (testWithIndex && OUTPUT_BIG == charsToOutput) {
        assertEquals(3, is.size());
    } else {
        assertEquals(1, is.size());
    }

    // let's read it all and calculate the md5 hash
    for (InputSplit inputSplit : is) {
        RecordReader<LongWritable, Text> rr = inputFormat.createRecordReader(inputSplit, attemptContext);
        rr.initialize(inputSplit, attemptContext);

        while (rr.nextKeyValue()) {
            Text value = rr.getCurrentValue();

            md5.update(value.getBytes(), 0, value.getLength());
        }

        rr.close();
    }

    localFs.close();
    assertTrue(Arrays.equals(expectedMd5, md5.digest()));
}

From source file:com.hdfs.concat.clean.TestClean.java

License:Apache License

@Test
public void testAge() throws Exception {
    Configuration conf = createJobConf();
    FileSystem fs = FileSystem.get(conf);
    fs.mkdirs(new Path(ROOT_DIR, "a"));
    fs.mkdirs(new Path(new Path(ROOT_DIR, "a"), "1"));
    fs.mkdirs(new Path(ROOT_DIR, "b"));
    fs.mkdirs(new Path(ROOT_DIR, "c"));
    fs.mkdirs(new Path(new Path(ROOT_DIR, "c"), "2"));

    Path oldFile = new Path(new Path(new Path(ROOT_DIR, "a"), "1"), "oldfile");
    FSDataOutputStream out = fs.create(oldFile);
    out.write("bla".getBytes());
    out.close();/* w ww .  j  a  v a2s .  c o  m*/

    Path cFile = new Path(new Path(new Path(ROOT_DIR, "c"), "1"), "cfile");
    FSDataOutputStream out2 = fs.create(cFile);
    out2.write("wah".getBytes());
    out2.close();

    assertEquals(true, fs.exists(cFile));
    assertEquals(true, fs.exists(oldFile));

    Clean cleanWarn = new Clean();
    Configuration warnConf = createJobConf();
    warnConf.set(Clean.TARGET_DIR, ROOT_DIR.toString());
    warnConf.set(Clean.TARGET_EXPR, "cfile");
    warnConf.set(Clean.WARN_MODE, "true");
    ToolRunner.run(warnConf, cleanWarn, new String[] {});
    assertEquals(true, fs.exists(cFile));
    assertEquals(true, fs.exists(oldFile));

    Clean cleanReg = new Clean();
    Configuration regConf = createJobConf();
    regConf.set(Clean.TARGET_DIR, ROOT_DIR.toString());
    regConf.set(Clean.TARGET_EXPR, "cfile");
    ToolRunner.run(regConf, cleanReg, new String[] {});
    assertEquals(false, fs.exists(cFile));
    assertEquals(true, fs.exists(oldFile));

    Clean clean = new Clean();
    Configuration cleanConf = createJobConf();
    cleanConf.setLong(Clean.CUTTOFF_MILLIS, 20000);
    cleanConf.set(Clean.TARGET_DIR, ROOT_DIR.toString());
    ToolRunner.run(cleanConf, clean, new String[] {});
    assertEquals(true, fs.exists(oldFile));
    Thread.sleep(3);

    Clean clean2 = new Clean();
    Configuration cleanConf2 = createJobConf();
    cleanConf2.setLong(Clean.CUTTOFF_MILLIS, 1);
    cleanConf2.set(Clean.TARGET_DIR, ROOT_DIR.toString());
    ToolRunner.run(cleanConf2, clean2, new String[] {});
    assertEquals(false, fs.exists(oldFile));

}

From source file:com.hdfs.concat.clean.TestClean.java

License:Apache License

@Test
public void testNegatives() throws Exception {
    Clean clean = new Clean();
    Configuration cleanConf = createJobConf();
    cleanConf.setLong(Clean.CUTTOFF_MILLIS, 20000);
    cleanConf.set(Clean.TARGET_DIR, ROOT_DIR.toString());
    cleanConf.set(Clean.TARGET_EXPR, "bla");
    int res = ToolRunner.run(cleanConf, clean, new String[] {});
    assertEquals(9, res);//from   w  w w  .  j a va  2s  .c  o  m
}

From source file:com.inclouds.hbase.utils.ConfigHelper.java

License:Open Source License

/**
 * Creates default Configuration with no persistence,
 * no compression but with Global and Cache max memory set.
 *
 * @param maxGlobalSize the max global size
 * @param maxSize the max size//from   ww w .ja v a  2  s  . co m
 * @return the default config
 */
public static Configuration getDefaultConfig(long maxGlobalSize, long maxSize) {
    Configuration cfg = new Configuration();
    cfg.setLong(CacheConfiguration.MAX_GLOBAL_MEMORY, maxGlobalSize);
    cfg.setLong(CacheConfiguration.MAX_MEMORY, maxSize);
    return cfg;
}

From source file:com.inclouds.hbase.utils.ConfigHelper.java

License:Open Source License

/**
 * Creates default Configuration with  persistence,
 * compression but with Global and./*from  w ww.ja va  2  s. c o m*/
 *
 * @param maxGlobalSize the max global size
 * @param maxSize the max size
 * @param mode the mode
 * @param codec the codec
 * @return the default config more
 */
public static Configuration getDefaultConfigMore(long maxGlobalSize, long maxSize, PersistenceMode mode,
        CodecType codec) {
    Configuration cfg = new Configuration();
    cfg.setLong(CacheConfiguration.MAX_GLOBAL_MEMORY, maxGlobalSize);
    cfg.setLong(CacheConfiguration.MAX_MEMORY, maxSize);
    cfg.set(CacheConfiguration.COMPRESSION, codec.toString());
    //cfg.set(name, value);
    return cfg;
}

From source file:com.inmobi.conduit.distcp.ConduitDistCp.java

License:Apache License

@Override
protected Path createInputFileListing(Job job) throws IOException {
    // get the file path where copy listing file has to be saved
    Path fileListingPath = getFileListingPath();
    Configuration config = job.getConfiguration();

    SequenceFile.Writer fileListWriter = null;
    try {//from   w ww  .  j av  a  2 s .com
        fileListWriter = SequenceFile.createWriter(fileListingPath.getFileSystem(config), config,
                fileListingPath, Text.class, FileStatus.class, SequenceFile.CompressionType.NONE);

        for (Map.Entry<String, FileStatus> entry : fileListingMap.entrySet()) {
            FileStatus status = FileUtil.getFileStatus(entry.getValue(), buffer, in);
            fileListWriter.append(new Text(entry.getKey()), status);

            // Create a sync point after each entry. This will ensure that SequenceFile
            // Reader can work at file entry level granularity, given that SequenceFile
            // Reader reads from the starting of sync point.
            fileListWriter.sync();

            totalBytesToCopy += entry.getValue().getLen();
            totalPaths++;
        }
    } finally {
        if (fileListWriter != null) {
            fileListWriter.close();
        }
    }

    LOG.info("Number of paths considered for copy: " + totalPaths);
    LOG.info("Number of bytes considered for copy: " + totalBytesToCopy
            + " (Actual number of bytes copied depends on whether any files are " + "skipped or overwritten.)");

    // set distcp configurations
    config.set(DistCpConstants.CONF_LABEL_LISTING_FILE_PATH, fileListingPath.toString());
    config.setLong(DistCpConstants.CONF_LABEL_TOTAL_BYTES_TO_BE_COPIED, totalBytesToCopy);
    config.setLong(DistCpConstants.CONF_LABEL_TOTAL_NUMBER_OF_RECORDS, totalPaths);

    return fileListingPath;
}