List of usage examples for org.apache.hadoop.fs FileSystem deleteOnExit
Set deleteOnExit
To view the source code for org.apache.hadoop.fs FileSystem deleteOnExit.
Click Source Link
From source file:com.blackberry.logtools.LogTools.java
License:Apache License
public void tmpDirHDFS(boolean quiet, boolean silent, FileSystem fs, Configuration conf, String tmp, boolean log) { logConsole(quiet, silent, info, "Creating new Temp Directory in HDFS: " + tmp); try {//from ww w . j a v a2 s .co m Path path = new Path(tmp); if (!(fs.exists(path))) { //Create directory fs.mkdirs(path); if (log != true) { fs.deleteOnExit(path); } } } catch (IOException e) { if (e.toString().contains("Failed to find any Kerberos")) { logConsole(true, true, error, "No/bad Kerberos ticket - please authenticate."); System.exit(1); } else if (e.toString().contains("quota") && e.toString().contains("exceeded")) { logConsole(true, true, error, "Disk quota Exceeded."); System.exit(1); } e.printStackTrace(); System.exit(1); } }
From source file:com.citic.zxyjs.zwlscx.mapreduce.lib.input.HFileOutputFormatBase.java
License:Apache License
/** * Configure <code>job</code> with a TotalOrderPartitioner, partitioning * against <code>splitPoints</code>. Cleans up the partitions file after job * exists./*w w w.j a v a 2s.co m*/ */ static void configurePartitioner(Job job, List<ImmutableBytesWritable> splitPoints) throws IOException { // create the partitions file FileSystem fs = FileSystem.get(job.getConfiguration()); Path partitionsPath = new Path("/tmp", "partitions_" + UUID.randomUUID()); fs.makeQualified(partitionsPath); fs.deleteOnExit(partitionsPath); writePartitions(job.getConfiguration(), partitionsPath, splitPoints); // configure job to use it job.setPartitionerClass(TotalOrderPartitioner.class); TotalOrderPartitioner.setPartitionFile(job.getConfiguration(), partitionsPath); }
From source file:com.cloudera.cdk.tools.JobClasspathHelper.java
License:Apache License
/** * @param fs//from w w w . j a va 2s . c o m * File system where to upload the jar. * @param localJarPath * The local path where we find the jar. * @param md5sum * The MD5 sum of the local jar. * @param remoteJarPath * The remote path where to upload the jar. * @param remoteMd5Path * The remote path where to create the MD5 file. * * @throws IOException */ private void copyJarToHDFS(FileSystem fs, Path localJarPath, String md5sum, Path remoteJarPath, Path remoteMd5Path) throws IOException { logger.info("Copying {} to {}", localJarPath.toUri().toASCIIString(), remoteJarPath.toUri().toASCIIString()); fs.copyFromLocalFile(localJarPath, remoteJarPath); // create the MD5 file for this jar. createMd5SumFile(fs, md5sum, remoteMd5Path); // we need to clean the tmp files that are are created by JarFinder after the JVM exits. if (remoteJarPath.getName().startsWith(JarFinder.TMP_HADOOP)) { fs.deleteOnExit(remoteJarPath); } // same for the MD5 file. if (remoteMd5Path.getName().startsWith(JarFinder.TMP_HADOOP)) { fs.deleteOnExit(remoteMd5Path); } }
From source file:com.cloudera.flume.handlers.hdfs.TestDFSWrite.java
License:Apache License
@Test public void testDirectWrite() throws IOException { FlumeConfiguration conf = FlumeConfiguration.get(); Path path = new Path("file:///tmp/testfile"); FileSystem hdfs = path.getFileSystem(conf); hdfs.deleteOnExit(path); String STRING = "Hello World"; // writing// www . j av a 2 s . c o m FSDataOutputStream dos = hdfs.create(path); dos.writeUTF(STRING); dos.close(); // reading FSDataInputStream dis = hdfs.open(path); String s = dis.readUTF(); System.out.println(s); assertEquals(STRING, s); dis.close(); hdfs.close(); }
From source file:com.cloudera.flume.handlers.hdfs.TestDFSWrite.java
License:Apache License
@Test public void testHDFSSequenceFileWrite() throws IOException { FlumeConfiguration conf = FlumeConfiguration.get(); Path path = new Path("file:///tmp/testfile"); FileSystem hdfs = path.getFileSystem(conf); hdfs.deleteOnExit(path); Event e = new EventImpl("EVENT".getBytes()); Writer w = SequenceFile.createWriter(hdfs, conf, path, WriteableEventKey.class, WriteableEvent.class); // writing// www . j av a2 s . c o m w.append(new WriteableEventKey(e), new WriteableEvent(e)); w.close(); FileStatus stats = hdfs.getFileStatus(path); assertTrue(stats.getLen() > 0); // reading SequenceFile.Reader r = new SequenceFile.Reader(hdfs, path, conf); WriteableEventKey k = new WriteableEventKey(); WriteableEvent evt = new WriteableEvent(); r.next(k, evt); assertEquals(evt.getTimestamp(), e.getTimestamp()); assertEquals(evt.getNanos(), e.getNanos()); assertEquals(evt.getPriority(), e.getPriority()); assertTrue(Arrays.equals(evt.getBody(), e.getBody())); hdfs.close(); }
From source file:com.cloudera.flume.handlers.hdfs.TestDFSWrite.java
License:Apache License
@Test public void testWhyFail() throws IOException { // There a was a failure case using : FlumeConfiguration conf = FlumeConfiguration.get(); Path path = new Path("file:///tmp/testfile"); FileSystem hdfs = path.getFileSystem(conf); // writing/*from w w w. j av a 2s . c om*/ FSDataOutputStream dos = hdfs.create(path); hdfs.deleteOnExit(path); // this version's Writer has ownOutputStream=false. Writer writer = SequenceFile.createWriter(conf, dos, WriteableEventKey.class, WriteableEvent.class, SequenceFile.CompressionType.NONE, new DefaultCodec()); Event e = new EventImpl("EVENT".getBytes()); writer.append(new WriteableEventKey(e), new WriteableEvent(e)); writer.sync(); writer.close(); dos.close(); // It is strange that I have to close the underlying // FSDataOutputStream. // WTF: nothing written by this writer! FileStatus stats = hdfs.getFileStatus(path); assertTrue(stats.getLen() > 0); // it should have written something but it failed. }
From source file:com.cloudera.flume.PerfHdfsIO.java
License:Apache License
@Test public void testCopy() throws IOException, InterruptedException { Benchmark b = new Benchmark("hdfs seqfile copy"); b.mark("begin"); MemorySinkSource mem = FlumeBenchmarkHarness.synthInMem(); b.mark("disk_loaded"); File tmp = File.createTempFile("test", "tmp"); tmp.deleteOnExit();//from w ww .j a v a 2s .c o m SeqfileEventSink sink = new SeqfileEventSink(tmp); sink.open(); b.mark("localdisk_write_started"); EventUtil.dumpAll(mem, sink); b.mark("local_disk_write done"); sink.close(); FlumeConfiguration conf = FlumeConfiguration.get(); Path src = new Path(tmp.getAbsolutePath()); Path dst = new Path("hdfs://localhost/testfile"); FileSystem hdfs = dst.getFileSystem(conf); hdfs.deleteOnExit(dst); b.mark("hdfs_copy_started"); hdfs.copyFromLocalFile(src, dst); b.mark("hdfs_copy_done"); hdfs.close(); b.done(); }
From source file:com.cloudera.flume.PerfHdfsIO.java
License:Apache License
@Test public void testDirectWrite() throws IOException, InterruptedException { Benchmark b = new Benchmark("hdfs seqfile write"); b.mark("begin"); MemorySinkSource mem = FlumeBenchmarkHarness.synthInMem(); b.mark("disk_loaded"); FlumeConfiguration conf = FlumeConfiguration.get(); Path path = new Path("hdfs://localhost/testfile"); FileSystem hdfs = path.getFileSystem(conf); hdfs.deleteOnExit(path); Writer w = SequenceFile.createWriter(hdfs, conf, path, WriteableEventKey.class, WriteableEvent.class); b.mark("hdfs_fileopen_started"); Event e = null;/* ww w . j a v a 2 s. com*/ while ((e = mem.next()) != null) { // writing w.append(new WriteableEventKey(e), new WriteableEvent(e)); } w.close(); b.mark("seqfile_hdfs_write"); hdfs.close(); b.done(); }
From source file:com.conversantmedia.mapreduce.example.PrepareInputsExample.java
License:Apache License
@DriverCleanup public void cleanup() throws IOException { FileSystem fs = FileSystem.get(getConf()); fs.deleteOnExit(getWorkingDirectory()); }
From source file:com.ebay.erl.mobius.core.mapred.ConfigurableJob.java
License:Apache License
private static void writePartitionFile(JobConf job, Sampler sampler) { try {/* www. j ava2 s.c om*/ //////////////////////////////////////////////// // first, getting samples from the data sources //////////////////////////////////////////////// LOGGER.info("Running local sampling for job [" + job.getJobName() + "]"); InputFormat inf = job.getInputFormat(); Object[] samples = sampler.getSample(inf, job); LOGGER.info("Samples retrieved, sorting..."); //////////////////////////////////////////////// // sort the samples //////////////////////////////////////////////// RawComparator comparator = job.getOutputKeyComparator(); Arrays.sort(samples, comparator); if (job.getBoolean("mobius.print.sample", false)) { PrintWriter pw = new PrintWriter( new OutputStreamWriter(new GZIPOutputStream(new BufferedOutputStream(new FileOutputStream( new File(job.get("mobius.sample.file", "./samples.txt.gz"))))))); for (Object obj : samples) { pw.println(obj); } pw.flush(); pw.close(); } //////////////////////////////////////////////// // start to write partition files //////////////////////////////////////////////// FileSystem fs = FileSystem.get(job); Path partitionFile = fs.makeQualified(new Path(TotalOrderPartitioner.getPartitionFile(job))); while (fs.exists(partitionFile)) { partitionFile = new Path(partitionFile.toString() + "." + System.currentTimeMillis()); } fs.deleteOnExit(partitionFile); TotalOrderPartitioner.setPartitionFile(job, partitionFile); LOGGER.info("write partition file to:" + partitionFile.toString()); int reducersNbr = job.getNumReduceTasks(); Set<Object> wroteSamples = new HashSet<Object>(); SequenceFile.Writer writer = SequenceFile.createWriter(fs, job, partitionFile, Tuple.class, NullWritable.class); float avgReduceSize = samples.length / reducersNbr; int lastBegin = 0; for (int i = 0; i < samples.length;) { // trying to distribute the load for every reducer evenly, // dividing the <code>samples</code> into a set of blocks // separated by boundaries, objects that selected from the // <code>samples</code> array, and each blocks should have // about the same size. // find the last index of element that equals to samples[i], as // such element might appear multiple times in the samples. int upperBound = Util.findUpperBound(samples, samples[i], comparator); int lowerBound = i;//Util.findLowerBound(samples, samples[i], comparator); // the repeat time of samples[i], if the key itself is too big // select it as boundary int currentElemSize = upperBound - lowerBound + 1; if (currentElemSize > avgReduceSize * 2) // greater than two times of average reducer size { // the current element is too big, greater than // two times of the <code>avgReduceSize</code>, // put itself as boundary writer.append(((DataJoinKey) samples[i]).getKey(), NullWritable.get()); wroteSamples.add(((DataJoinKey) samples[i]).getKey()); //pw.println(samples[i]); // immediate put the next element to the boundary, // the next element starts at <code> upperBound+1 // </code>, to prevent the current one consume even // more. if (upperBound + 1 < samples.length) { writer.append(((DataJoinKey) samples[upperBound + 1]).getKey(), NullWritable.get()); wroteSamples.add(((DataJoinKey) samples[upperBound + 1]).getKey()); //pw.println(samples[upperBound+1]); // move on to the next element of <code>samples[upperBound+1]/code> lastBegin = Util.findUpperBound(samples, samples[upperBound + 1], comparator) + 1; i = lastBegin; } else { break; } } else { // current element is small enough to be consider // with previous group int size = upperBound - lastBegin; if (size > avgReduceSize) { // by including the current elements, we have // found a block that's big enough, select it // as boundary writer.append(((DataJoinKey) samples[i]).getKey(), NullWritable.get()); wroteSamples.add(((DataJoinKey) samples[i]).getKey()); //pw.println(samples[i]); i = upperBound + 1; lastBegin = i; } else { i = upperBound + 1; } } } writer.close(); // if the number of wrote samples doesn't equals to number of // reducer minus one, then it means the key spaces is too small // hence TotalOrderPartitioner won't work, it works only if // the partition boundaries are distinct. // // we need to change the number of reducers if (wroteSamples.size() + 1 != reducersNbr) { LOGGER.info("Write complete, but key space is too small, sample size=" + wroteSamples.size() + ", reducer size:" + (reducersNbr)); LOGGER.info("Set the reducer size to:" + (wroteSamples.size() + 1)); // add 1 because the wrote samples define boundary, ex, if // the sample size is two with two element [300, 1000], then // there should be 3 reducers, one for handling i<300, one // for n300<=i<1000, and another one for 1000<=i job.setNumReduceTasks((wroteSamples.size() + 1)); } samples = null; } catch (IOException e) { LOGGER.error(e.getMessage(), e); throw new RuntimeException(e); } }