List of usage examples for org.apache.hadoop.conf Configuration setLong
public void setLong(String name, long value)
name
property to a long
. From source file:com.github.ygf.pagerank.PageRank.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length != 3) { System.out.println("Usage: PageRank <links-simple-sorted.txt> <titles-dir> <output-dir>"); ToolRunner.printGenericCommandUsage(System.out); return 2; }// ww w .ja v a 2s . c o m Path linksFile = new Path(args[0]); Path titlesDir = new Path(args[1]); Path outputDir = new Path(args[2]); Configuration conf = getConf(); // Do not create _SUCCESS files. MapFileOutputFormat.getReaders calls // try to read the _SUCCESS as another MapFile dir. conf.set("mapreduce.fileoutputcommitter.marksuccessfuljobs", "false"); // Default values of the parameters of the algorithm. conf.setInt("pagerank.block_size", conf.getInt("pagerank.block_size", 10000)); conf.setInt("pagerank.max_iterations", conf.getInt("pagerank.max_iterations", 2)); conf.setFloat("pagerank.damping_factor", conf.getFloat("pagerank.damping_factor", 0.85f)); conf.setInt("pagerank.top_results", conf.getInt("pagerank.top_results", 100)); conf.set("pagerank.titles_dir", titlesDir.toString()); int numPages = getNumPages(conf, titlesDir); conf.setLong("pagerank.num_pages", numPages); createTransitionMatrix(conf, linksFile, outputDir); int maxIters = Integer.parseInt(conf.get("pagerank.max_iterations")); for (int iter = 1; iter <= maxIters; iter++) { conf.setInt("pagerank.iteration", iter); pageRankIteration(iter, conf, outputDir); cleanPreviousIteration(iter, conf, outputDir); } summarizeResults(maxIters, conf, outputDir); return 0; }
From source file:com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystemIntegrationTest.java
License:Open Source License
/** * Validates that we correctly build our Options object * from a Hadoop config./*from w w w. ja va 2 s .c o m*/ */ @Test public void testBuildOptionsFromConfig() throws IOException { GoogleHadoopFileSystem fs = new GoogleHadoopFileSystem(); Configuration config = loadConfig("projectId", "serviceAccount", "priveKeyFile"); GoogleCloudStorageFileSystemOptions.Builder optionsBuilder = fs.createOptionsBuilderFromConfig(config); GoogleCloudStorageFileSystemOptions options = optionsBuilder.build(); GoogleCloudStorageOptions gcsOptions = options.getCloudStorageOptions(); Assert.assertTrue(gcsOptions.isAutoRepairImplicitDirectoriesEnabled()); Assert.assertFalse(gcsOptions.isInferImplicitDirectoriesEnabled()); config.setBoolean(GoogleHadoopFileSystemBase.GCS_ENABLE_REPAIR_IMPLICIT_DIRECTORIES_KEY, false); config.setBoolean(GoogleHadoopFileSystemBase.GCS_ENABLE_INFER_IMPLICIT_DIRECTORIES_KEY, true); config.setLong(GoogleHadoopFileSystemBase.GCS_METADATA_CACHE_MAX_ENTRY_AGE_KEY, 2222L); config.setLong(GoogleHadoopFileSystemBase.GCS_METADATA_CACHE_MAX_INFO_AGE_KEY, 1111L); optionsBuilder = fs.createOptionsBuilderFromConfig(config); options = optionsBuilder.build(); Assert.assertEquals(2222L, options.getCacheMaxEntryAgeMillis()); Assert.assertEquals(1111L, options.getCacheMaxInfoAgeMillis()); gcsOptions = options.getCloudStorageOptions(); Assert.assertFalse(gcsOptions.isAutoRepairImplicitDirectoriesEnabled()); Assert.assertTrue(gcsOptions.isInferImplicitDirectoriesEnabled()); }
From source file:com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystemIntegrationTest.java
License:Open Source License
/** * Validates success path in initialize(). *//*from w ww .j av a 2s. co m*/ @Test @Override public void testInitializeSuccess() throws IOException, URISyntaxException { GoogleHadoopFileSystem fs = null; // Reuse loadConfig() to initialize auth related settings. Configuration config = loadConfig(); // Set up remaining settings to known test values. int bufferSize = 512; config.setInt(GoogleHadoopFileSystemBase.BUFFERSIZE_KEY, bufferSize); long blockSize = 1024; config.setLong(GoogleHadoopFileSystemBase.BLOCK_SIZE_KEY, blockSize); String systemBucketName = ghfsHelper.getUniqueBucketName("-system-bucket"); String rootBucketName = ghfsHelper.getUniqueBucketName("-root-bucket"); config.set(GoogleHadoopFileSystemBase.GCS_SYSTEM_BUCKET_KEY, systemBucketName); URI initUri = (new Path("gs://" + rootBucketName)).toUri(); try { fs = new GoogleHadoopFileSystem(); fs.initialize(initUri, config); } catch (IOException e) { Assert.fail("Unexpected exception"); } // Verify that config settings were set correctly. Assert.assertEquals(bufferSize, fs.getBufferSizeOverride()); Assert.assertEquals(blockSize, fs.getDefaultBlockSize()); Assert.assertEquals(systemBucketName, fs.getSystemBucketName()); Assert.assertEquals(initUri, fs.initUri); Assert.assertEquals(rootBucketName, fs.getRootBucketName()); initUri = (new Path("gs:/foo")).toUri(); try { fs = new GoogleHadoopFileSystem(); fs.initialize(initUri, config); } catch (IOException e) { Assert.fail("Unexpected exception"); } // Verify that config settings were set correctly. Assert.assertEquals(bufferSize, fs.getBufferSizeOverride()); Assert.assertEquals(blockSize, fs.getDefaultBlockSize()); Assert.assertEquals(systemBucketName, fs.getSystemBucketName()); Assert.assertEquals(initUri, fs.initUri); Assert.assertEquals(systemBucketName, fs.getRootBucketName()); }
From source file:com.google.cloud.hadoop.fs.gcs.GoogleHadoopGlobalRootedFileSystemIntegrationTest.java
License:Open Source License
/** * Validates success path in initialize(). *//* w w w. j a va 2 s .com*/ @Test @Override public void testInitializeSuccess() throws IOException, URISyntaxException { GoogleHadoopFileSystemBase fs = null; // Reuse loadConfig() to initialize auth related settings. Configuration config = loadConfig(); // Set up remaining settings to known test values. int bufferSize = 512; config.setInt(GoogleHadoopFileSystemBase.BUFFERSIZE_KEY, bufferSize); long blockSize = 1024; config.setLong(GoogleHadoopFileSystemBase.BLOCK_SIZE_KEY, blockSize); String systemBucketName = ghfsHelper.getUniqueBucketName("-system-bucket"); config.set(GoogleHadoopFileSystemBase.GCS_SYSTEM_BUCKET_KEY, systemBucketName); URI initUri = (new Path("gsg://bucket-should-be-ignored")).toUri(); try { fs = new GoogleHadoopGlobalRootedFileSystem(); fs.initialize(initUri, config); } catch (IOException e) { Assert.fail("Unexpected exception"); } // Verify that config settings were set correctly. Assert.assertEquals(bufferSize, fs.getBufferSizeOverride()); Assert.assertEquals(blockSize, fs.getDefaultBlockSize()); Assert.assertEquals(systemBucketName, fs.getSystemBucketName()); Assert.assertEquals(initUri, fs.initUri); }
From source file:com.hadoop.mapreduce.TestLzoTextInputFormat.java
License:Open Source License
/** * Generate random data, compress it, index and md5 hash the data. * Then read it all back and md5 that too, to verify that it all went ok. * // w w w . j a va 2 s.c om * @param testWithIndex Should we index or not? * @param charsToOutput How many characters of random data should we output. * @throws IOException * @throws NoSuchAlgorithmException * @throws InterruptedException */ private void runTest(boolean testWithIndex, int charsToOutput) throws IOException, NoSuchAlgorithmException, InterruptedException { if (!GPLNativeCodeLoader.isNativeCodeLoaded()) { LOG.warn("Cannot run this test without the native lzo libraries"); return; } Configuration conf = new Configuration(); conf.setLong("fs.local.block.size", charsToOutput / 2); // reducing block size to force a split of the tiny file conf.set("io.compression.codecs", LzopCodec.class.getName()); FileSystem localFs = FileSystem.getLocal(conf); localFs.delete(outputDir, true); localFs.mkdirs(outputDir); Job job = new Job(conf); TextOutputFormat.setCompressOutput(job, true); TextOutputFormat.setOutputCompressorClass(job, LzopCodec.class); TextOutputFormat.setOutputPath(job, outputDir); TaskAttemptContext attemptContext = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID("123", 0, TaskType.REDUCE, 1, 2)); // create some input data byte[] expectedMd5 = createTestInput(outputDir, localFs, attemptContext, charsToOutput); if (testWithIndex) { Path lzoFile = new Path(outputDir, lzoFileName); LzoTextInputFormat.createIndex(localFs, lzoFile); } LzoTextInputFormat inputFormat = new LzoTextInputFormat(); TextInputFormat.setInputPaths(job, outputDir); List<InputSplit> is = inputFormat.getSplits(job); //verify we have the right number of lzo chunks if (testWithIndex && OUTPUT_BIG == charsToOutput) { assertEquals(3, is.size()); } else { assertEquals(1, is.size()); } // let's read it all and calculate the md5 hash for (InputSplit inputSplit : is) { RecordReader<LongWritable, Text> rr = inputFormat.createRecordReader(inputSplit, attemptContext); rr.initialize(inputSplit, attemptContext); while (rr.nextKeyValue()) { Text value = rr.getCurrentValue(); md5.update(value.getBytes(), 0, value.getLength()); } rr.close(); } localFs.close(); assertTrue(Arrays.equals(expectedMd5, md5.digest())); }
From source file:com.hdfs.concat.clean.TestClean.java
License:Apache License
@Test public void testAge() throws Exception { Configuration conf = createJobConf(); FileSystem fs = FileSystem.get(conf); fs.mkdirs(new Path(ROOT_DIR, "a")); fs.mkdirs(new Path(new Path(ROOT_DIR, "a"), "1")); fs.mkdirs(new Path(ROOT_DIR, "b")); fs.mkdirs(new Path(ROOT_DIR, "c")); fs.mkdirs(new Path(new Path(ROOT_DIR, "c"), "2")); Path oldFile = new Path(new Path(new Path(ROOT_DIR, "a"), "1"), "oldfile"); FSDataOutputStream out = fs.create(oldFile); out.write("bla".getBytes()); out.close();/* w ww . j a v a2s . c o m*/ Path cFile = new Path(new Path(new Path(ROOT_DIR, "c"), "1"), "cfile"); FSDataOutputStream out2 = fs.create(cFile); out2.write("wah".getBytes()); out2.close(); assertEquals(true, fs.exists(cFile)); assertEquals(true, fs.exists(oldFile)); Clean cleanWarn = new Clean(); Configuration warnConf = createJobConf(); warnConf.set(Clean.TARGET_DIR, ROOT_DIR.toString()); warnConf.set(Clean.TARGET_EXPR, "cfile"); warnConf.set(Clean.WARN_MODE, "true"); ToolRunner.run(warnConf, cleanWarn, new String[] {}); assertEquals(true, fs.exists(cFile)); assertEquals(true, fs.exists(oldFile)); Clean cleanReg = new Clean(); Configuration regConf = createJobConf(); regConf.set(Clean.TARGET_DIR, ROOT_DIR.toString()); regConf.set(Clean.TARGET_EXPR, "cfile"); ToolRunner.run(regConf, cleanReg, new String[] {}); assertEquals(false, fs.exists(cFile)); assertEquals(true, fs.exists(oldFile)); Clean clean = new Clean(); Configuration cleanConf = createJobConf(); cleanConf.setLong(Clean.CUTTOFF_MILLIS, 20000); cleanConf.set(Clean.TARGET_DIR, ROOT_DIR.toString()); ToolRunner.run(cleanConf, clean, new String[] {}); assertEquals(true, fs.exists(oldFile)); Thread.sleep(3); Clean clean2 = new Clean(); Configuration cleanConf2 = createJobConf(); cleanConf2.setLong(Clean.CUTTOFF_MILLIS, 1); cleanConf2.set(Clean.TARGET_DIR, ROOT_DIR.toString()); ToolRunner.run(cleanConf2, clean2, new String[] {}); assertEquals(false, fs.exists(oldFile)); }
From source file:com.hdfs.concat.clean.TestClean.java
License:Apache License
@Test public void testNegatives() throws Exception { Clean clean = new Clean(); Configuration cleanConf = createJobConf(); cleanConf.setLong(Clean.CUTTOFF_MILLIS, 20000); cleanConf.set(Clean.TARGET_DIR, ROOT_DIR.toString()); cleanConf.set(Clean.TARGET_EXPR, "bla"); int res = ToolRunner.run(cleanConf, clean, new String[] {}); assertEquals(9, res);//from w w w . j a va 2s .c o m }
From source file:com.inclouds.hbase.utils.ConfigHelper.java
License:Open Source License
/** * Creates default Configuration with no persistence, * no compression but with Global and Cache max memory set. * * @param maxGlobalSize the max global size * @param maxSize the max size//from ww w .ja v a 2 s . co m * @return the default config */ public static Configuration getDefaultConfig(long maxGlobalSize, long maxSize) { Configuration cfg = new Configuration(); cfg.setLong(CacheConfiguration.MAX_GLOBAL_MEMORY, maxGlobalSize); cfg.setLong(CacheConfiguration.MAX_MEMORY, maxSize); return cfg; }
From source file:com.inclouds.hbase.utils.ConfigHelper.java
License:Open Source License
/** * Creates default Configuration with persistence, * compression but with Global and./*from w ww.ja va 2 s. c o m*/ * * @param maxGlobalSize the max global size * @param maxSize the max size * @param mode the mode * @param codec the codec * @return the default config more */ public static Configuration getDefaultConfigMore(long maxGlobalSize, long maxSize, PersistenceMode mode, CodecType codec) { Configuration cfg = new Configuration(); cfg.setLong(CacheConfiguration.MAX_GLOBAL_MEMORY, maxGlobalSize); cfg.setLong(CacheConfiguration.MAX_MEMORY, maxSize); cfg.set(CacheConfiguration.COMPRESSION, codec.toString()); //cfg.set(name, value); return cfg; }
From source file:com.inmobi.conduit.distcp.ConduitDistCp.java
License:Apache License
@Override protected Path createInputFileListing(Job job) throws IOException { // get the file path where copy listing file has to be saved Path fileListingPath = getFileListingPath(); Configuration config = job.getConfiguration(); SequenceFile.Writer fileListWriter = null; try {//from w ww . j av a 2 s .com fileListWriter = SequenceFile.createWriter(fileListingPath.getFileSystem(config), config, fileListingPath, Text.class, FileStatus.class, SequenceFile.CompressionType.NONE); for (Map.Entry<String, FileStatus> entry : fileListingMap.entrySet()) { FileStatus status = FileUtil.getFileStatus(entry.getValue(), buffer, in); fileListWriter.append(new Text(entry.getKey()), status); // Create a sync point after each entry. This will ensure that SequenceFile // Reader can work at file entry level granularity, given that SequenceFile // Reader reads from the starting of sync point. fileListWriter.sync(); totalBytesToCopy += entry.getValue().getLen(); totalPaths++; } } finally { if (fileListWriter != null) { fileListWriter.close(); } } LOG.info("Number of paths considered for copy: " + totalPaths); LOG.info("Number of bytes considered for copy: " + totalBytesToCopy + " (Actual number of bytes copied depends on whether any files are " + "skipped or overwritten.)"); // set distcp configurations config.set(DistCpConstants.CONF_LABEL_LISTING_FILE_PATH, fileListingPath.toString()); config.setLong(DistCpConstants.CONF_LABEL_TOTAL_BYTES_TO_BE_COPIED, totalBytesToCopy); config.setLong(DistCpConstants.CONF_LABEL_TOTAL_NUMBER_OF_RECORDS, totalPaths); return fileListingPath; }