List of usage examples for org.apache.hadoop.fs FileSystem delete
public abstract boolean delete(Path f, boolean recursive) throws IOException;
From source file:com.hadoop.hdfs_test.HdfsWriter.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length < 1) { System.err.println("HdfsWriter [fileSize ie. 1g/10g/40g]"); return 1; }/*from www .j a v a 2 s.c o m*/ double fileSize; double fileSizeInMB; if (args[0].equals("1g")) { fileSize = 1073741824.0; fileSizeInMB = 1024.0; } else if (args[0].equals("10g")) { fileSize = 10737418240.0; fileSizeInMB = 10240.0; } else if (args[0].equals("100g")) { fileSize = 107374182400.0; fileSizeInMB = 102400.0; } else if (args[0].equals("200g")) { fileSize = 214748364800.0; fileSizeInMB = 204800.0; } else { throw new IllegalArgumentException("Invalid arg: " + args[0]); } String localFolder = "/home/hduser/projects/hdfs_test/input/"; String hdfsFolder = "/hdfs_test/"; int numFiles = 5; short replication = 1; String hdfsFile; long startTime, endTime, duration = 0; long avg = 0, min = Long.MAX_VALUE, max = Long.MIN_VALUE; String fileName = args[0] + "-avg.txt"; File avgFile = new File(fileName); PrintWriter avgPW = new PrintWriter(avgFile); fileName = args[0] + "-min.txt"; File minFile = new File(fileName); PrintWriter minPW = new PrintWriter(minFile); fileName = args[0] + "-max.txt"; File maxFile = new File(fileName); PrintWriter maxPW = new PrintWriter(maxFile); boolean overWrite = true; int bufferSize[] = new int[] { 4096, 16384, 65536, 262144 }; long blockSize[] = new long[] { 67108864, 134217728, 268435456 }; Configuration conf = getConf(); System.out.println("configured filesystem = " + conf.get(FS_PARAM_NAME)); FileSystem fs = FileSystem.get(conf); for (int i = 0; i < 4; i++) { // different buffer size for (int j = 0; j < 3; j++) { // different block size double numIters = fileSize / (double) bufferSize[i]; byte[] buf = new byte[bufferSize[i]]; for (int m = 0; m < bufferSize[i]; m += 4) { buf[m] = (byte) m; } for (int k = 1; k <= numFiles; k++) { hdfsFile = hdfsFolder + args[0] + "/" + i + ".in"; Path outputPath = new Path(hdfsFile); OutputStream os = fs.create(outputPath, overWrite, bufferSize[i], replication, blockSize[j]); startTime = System.currentTimeMillis(); for (long m = 0; m < numIters; m++) { os.write(buf); } endTime = System.currentTimeMillis(); os.close(); fs.delete(outputPath, true); duration = endTime - startTime; avg += duration; if (duration < min) { min = duration; } if (duration > max) { max = duration; } } // write result to output Double avgBW = fileSizeInMB * 1000.0 * (double) numFiles / (double) avg; avgPW.print(avgBW); avgPW.print("\t"); double minBW = fileSizeInMB * 1000.0 / (double) max; minPW.print(minBW); minPW.print("\t"); double maxBW = fileSizeInMB * 1000.0 / (double) min; maxPW.print(maxBW); maxPW.print("\t"); duration = 0; avg = 0; min = Long.MAX_VALUE; max = Long.MIN_VALUE; } avgPW.println(); minPW.println(); maxPW.println(); } //avgFile.close(); avgPW.close(); //minFile.close(); minPW.close(); //maxFile.close(); maxPW.close(); /* System.out.println ("avg: " + (fileSizeInMB*1000.0*(double)numFiles/(double)avg) + " max: " + (fileSizeInMB*1000.0)/(double)min + " min: " + (fileSizeInMB*1000.0)/(double)max); */ return 0; }
From source file:com.hadoop.mapreduce.TestLzoTextInputFormat.java
License:Open Source License
/** * Generate random data, compress it, index and md5 hash the data. * Then read it all back and md5 that too, to verify that it all went ok. * // ww w . jav a 2s. c o m * @param testWithIndex Should we index or not? * @param charsToOutput How many characters of random data should we output. * @throws IOException * @throws NoSuchAlgorithmException * @throws InterruptedException */ private void runTest(boolean testWithIndex, int charsToOutput) throws IOException, NoSuchAlgorithmException, InterruptedException { if (!GPLNativeCodeLoader.isNativeCodeLoaded()) { LOG.warn("Cannot run this test without the native lzo libraries"); return; } Configuration conf = new Configuration(); conf.setLong("fs.local.block.size", charsToOutput / 2); // reducing block size to force a split of the tiny file conf.set("io.compression.codecs", LzopCodec.class.getName()); FileSystem localFs = FileSystem.getLocal(conf); localFs.delete(outputDir, true); localFs.mkdirs(outputDir); Job job = new Job(conf); TextOutputFormat.setCompressOutput(job, true); TextOutputFormat.setOutputCompressorClass(job, LzopCodec.class); TextOutputFormat.setOutputPath(job, outputDir); TaskAttemptContext attemptContext = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID("123", 0, TaskType.REDUCE, 1, 2)); // create some input data byte[] expectedMd5 = createTestInput(outputDir, localFs, attemptContext, charsToOutput); if (testWithIndex) { Path lzoFile = new Path(outputDir, lzoFileName); LzoTextInputFormat.createIndex(localFs, lzoFile); } LzoTextInputFormat inputFormat = new LzoTextInputFormat(); TextInputFormat.setInputPaths(job, outputDir); List<InputSplit> is = inputFormat.getSplits(job); //verify we have the right number of lzo chunks if (testWithIndex && OUTPUT_BIG == charsToOutput) { assertEquals(3, is.size()); } else { assertEquals(1, is.size()); } // let's read it all and calculate the md5 hash for (InputSplit inputSplit : is) { RecordReader<LongWritable, Text> rr = inputFormat.createRecordReader(inputSplit, attemptContext); rr.initialize(inputSplit, attemptContext); while (rr.nextKeyValue()) { Text value = rr.getCurrentValue(); md5.update(value.getBytes(), 0, value.getLength()); } rr.close(); } localFs.close(); assertTrue(Arrays.equals(expectedMd5, md5.digest())); }
From source file:com.hadoopilluminated.examples.dancing.DistributedPentomino.java
License:Apache License
public int run(String[] args) throws Exception { JobConf conf;/* w w w. j a va 2 s . c o m*/ int depth = 5; int width = 9; int height = 10; Class<? extends Pentomino> pentClass; if (args.length == 0) { System.out.println("pentomino <output>"); ToolRunner.printGenericCommandUsage(System.out); return -1; } conf = new JobConf(getConf()); width = conf.getInt("pent.width", width); height = conf.getInt("pent.height", height); depth = conf.getInt("pent.depth", depth); pentClass = conf.getClass("pent.class", OneSidedPentonimo.class, Pentomino.class); Path output = new Path(args[0]); Path input = new Path(output + "_input"); FileSystem fileSys = FileSystem.get(conf); try { FileInputFormat.setInputPaths(conf, input); FileOutputFormat.setOutputPath(conf, output); conf.setJarByClass(PentMap.class); conf.setJobName("dancingElephant"); Pentomino pent = ReflectionUtils.newInstance(pentClass, conf); pent.initialize(width, height); createInputDirectory(fileSys, input, pent, depth); // the keys are the prefix strings conf.setOutputKeyClass(Text.class); // the values are puzzle solutions conf.setOutputValueClass(Text.class); conf.setMapperClass(PentMap.class); conf.setReducerClass(IdentityReducer.class); conf.setNumMapTasks(2000); conf.setNumReduceTasks(1); JobClient.runJob(conf); } finally { fileSys.delete(input, true); } return 0; }
From source file:com.hdfs.concat.clean.TestClean.java
License:Apache License
public void setUp() throws Exception { super.setUp(); Path rootDir = getDir(ROOT_DIR); Configuration conf = createJobConf(); FileSystem fs = FileSystem.get(conf); fs.delete(rootDir, true); }
From source file:com.hdfs.concat.crush.integration.CrushMapReduceTest.java
License:Apache License
@Before @Override//from w ww . j av a 2 s .co m public void setUp() throws Exception { super.setUp(); job = createJobConf(); job.setBoolean("mapred.output.compress", true); job.set("mapred.output.compression.type", CompressionType.BLOCK.name()); job.set("mapred.output.compression.codec", CustomCompressionCodec.class.getName()); FileSystem fs = getFileSystem(); Path homeDirPath = fs.makeQualified(new Path(".")); homeDir = homeDirPath.toUri().getPath(); fs.delete(homeDirPath, true); defaultCodec = new DefaultCodec(); defaultCodec.setConf(job); customCodec = new CustomCompressionCodec(); customCodec.setConf(job); }
From source file:com.hhscyber.nl.tweets.gencsv.GenCsv.java
/** * @param args the command line arguments * @throws java.io.IOException//from ww w .j a v a2 s.c om */ public static void main(String[] args) throws IOException, Exception { Conf conf = new Conf(args, ""); FileSystem hdfs = FileSystem.get(conf); conf.set("outputpath", "location"); Job job = new Job(conf, "GenerateCsv"); job.setJarByClass(GenCsv.class); String stop = "634628247817359360"; //1000 tweets? add 1 row Scan scan = new Scan(); //scan.setStopRow(stop.getBytes()); job.setSpeculativeExecution(false); FilterList filterList = new FilterList(); SingleColumnValueFilter filterCity = new SingleColumnValueFilter( hbasehelper.HbaseHelper.getPutBytesSafe("content"), hbasehelper.HbaseHelper.getPutBytesSafe("location_city"), CompareFilter.CompareOp.NOT_EQUAL, hbasehelper.HbaseHelper.getPutBytesSafe("")); SingleColumnValueFilter filterKnown = new SingleColumnValueFilter( hbasehelper.HbaseHelper.getPutBytesSafe("content"), hbasehelper.HbaseHelper.getPutBytesSafe("location_known"), CompareFilter.CompareOp.EQUAL, hbasehelper.HbaseHelper.getPutBytesSafe("true")); filterList.addFilter(filterCity); filterList.addFilter(filterKnown); scan.setFilter(filterList); TableMapReduceUtil.initTableMapperJob("hhscyber:tweets_final", scan, GenCsvMapper.class, ImmutableBytesWritable.class, Result.class, job); job.setNumReduceTasks(1); job.setOutputFormatClass(NullOutputFormat.class); job.setReducerClass(GenCsvReducer.class); hdfs.delete(new Path("location"), true); job.waitForCompletion(true); }
From source file:com.hhscyber.nl.tweets.svm.train.Train.java
/** * @param args the command line arguments * @throws java.io.IOException/*from w ww . j a v a 2 s . c o m*/ */ public static void main(String[] args) throws IOException { Conf conf = new Conf(args, ""); FileSystem hdfs = FileSystem.get(conf); hdfs.delete(new Path("trainer"), true); Job client = new HBJob(conf, "SVMTrainer"); client.setJarByClass(Train.class); client.setMapOutputKeyClass(Text.class); client.setMapOutputValueClass(Text.class); client.setInputFormatClass(TextInputFormat.class); TextInputFormat.addInputPath(client, new Path("svmclass")); client.setNumReduceTasks(1); client.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(client, new Path("trainer")); client.setMapperClass(TrainMapper.class); client.setReducerClass(TrainReducer.class); try { client.waitForCompletion(true); } catch (IOException | InterruptedException | ClassNotFoundException e) { } }
From source file:com.hive_unit.HiveTestBase.java
License:Apache License
public void setUp() throws Exception { super.setUp(); String jarFile = org.apache.hadoop.hive.ql.exec.CopyTask.class.getProtectionDomain().getCodeSource() .getLocation().getFile();//from w w w . ja v a 2 s.c om System.setProperty(HiveConf.ConfVars.HIVEJAR.toString(), jarFile); Path rootDir = getDir(ROOT_DIR); Configuration conf = createJobConf(); FileSystem fs = FileSystem.get(conf); fs.delete(rootDir, true); Path metastorePath = new Path("/tmp/metastore_db"); fs.delete(metastorePath, true); Path warehouse = new Path("/tmp/warehouse"); fs.delete(warehouse, true); fs.mkdirs(warehouse); }
From source file:com.hive_unit.HiveTestService.java
License:Apache License
public void setUp() throws Exception { super.setUp(); Path rootDir = getDir(ROOT_DIR); Configuration conf = createJobConf(); FileSystem fs = FileSystem.get(conf); fs.delete(rootDir, true); Path metastorePath = new Path("/tmp/metastore_db"); fs.delete(metastorePath, true);//from w ww. j a v a 2s. c o m Path warehouse = new Path("/tmp/warehouse"); fs.delete(warehouse, true); fs.mkdirs(warehouse); if (standAloneServer) { try { transport = new TSocket(host, port); TProtocol protocol = new TBinaryProtocol(transport); client = new HiveClient(protocol); transport.open(); } catch (Throwable e) { e.printStackTrace(); } } else { client = new HiveServer.HiveServerHandler(); } }
From source file:com.hotels.plunger.TapDataWriter.java
License:Apache License
private void deleteTemporaryPath(Path outputPath, FileSystem fileSystem) throws IOException { if (fileSystem.exists(outputPath)) { Path tmpDir = new Path(outputPath, Hadoop18TapUtil.TEMPORARY_PATH); if (fileSystem.exists(tmpDir)) { fileSystem.delete(tmpDir, true); }// w w w .j av a2s.co m } }