Example usage for org.apache.hadoop.fs FileSystem deleteOnExit

List of usage examples for org.apache.hadoop.fs FileSystem deleteOnExit

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem deleteOnExit.

Prototype

Set deleteOnExit

To view the source code for org.apache.hadoop.fs FileSystem deleteOnExit.

Click Source Link

Document

A cache of files that should be deleted when the FileSystem is closed or the JVM is exited.

Usage

From source file:com.blackberry.logtools.LogTools.java

License:Apache License

public void tmpDirHDFS(boolean quiet, boolean silent, FileSystem fs, Configuration conf, String tmp,
        boolean log) {
    logConsole(quiet, silent, info, "Creating new Temp Directory in HDFS: " + tmp);

    try {//from   ww  w  .  j a  v a2  s .co  m
        Path path = new Path(tmp);
        if (!(fs.exists(path))) {
            //Create directory
            fs.mkdirs(path);
            if (log != true) {
                fs.deleteOnExit(path);
            }
        }
    } catch (IOException e) {
        if (e.toString().contains("Failed to find any Kerberos")) {
            logConsole(true, true, error, "No/bad Kerberos ticket - please authenticate.");
            System.exit(1);
        } else if (e.toString().contains("quota") && e.toString().contains("exceeded")) {
            logConsole(true, true, error, "Disk quota Exceeded.");
            System.exit(1);
        }
        e.printStackTrace();
        System.exit(1);
    }
}

From source file:com.citic.zxyjs.zwlscx.mapreduce.lib.input.HFileOutputFormatBase.java

License:Apache License

/**
 * Configure <code>job</code> with a TotalOrderPartitioner, partitioning
 * against <code>splitPoints</code>. Cleans up the partitions file after job
 * exists./*w  w w.j  a v a  2s.co m*/
 */
static void configurePartitioner(Job job, List<ImmutableBytesWritable> splitPoints) throws IOException {

    // create the partitions file
    FileSystem fs = FileSystem.get(job.getConfiguration());
    Path partitionsPath = new Path("/tmp", "partitions_" + UUID.randomUUID());
    fs.makeQualified(partitionsPath);
    fs.deleteOnExit(partitionsPath);
    writePartitions(job.getConfiguration(), partitionsPath, splitPoints);

    // configure job to use it
    job.setPartitionerClass(TotalOrderPartitioner.class);
    TotalOrderPartitioner.setPartitionFile(job.getConfiguration(), partitionsPath);
}

From source file:com.cloudera.cdk.tools.JobClasspathHelper.java

License:Apache License

/**
 * @param fs//from   w w w  .  j a  va 2s  . c  o m
 *            File system where to upload the jar.
 * @param localJarPath
 *            The local path where we find the jar.
 * @param md5sum
 *            The MD5 sum of the local jar.
 * @param remoteJarPath
 *            The remote path where to upload the jar.
 * @param remoteMd5Path
 *            The remote path where to create the MD5 file.
 * 
 * @throws IOException
 */
private void copyJarToHDFS(FileSystem fs, Path localJarPath, String md5sum, Path remoteJarPath,
        Path remoteMd5Path) throws IOException {

    logger.info("Copying {} to {}", localJarPath.toUri().toASCIIString(),
            remoteJarPath.toUri().toASCIIString());
    fs.copyFromLocalFile(localJarPath, remoteJarPath);
    // create the MD5 file for this jar.
    createMd5SumFile(fs, md5sum, remoteMd5Path);

    // we need to clean the tmp files that are are created by JarFinder after the JVM exits.
    if (remoteJarPath.getName().startsWith(JarFinder.TMP_HADOOP)) {
        fs.deleteOnExit(remoteJarPath);
    }
    // same for the MD5 file.
    if (remoteMd5Path.getName().startsWith(JarFinder.TMP_HADOOP)) {
        fs.deleteOnExit(remoteMd5Path);
    }
}

From source file:com.cloudera.flume.handlers.hdfs.TestDFSWrite.java

License:Apache License

@Test
public void testDirectWrite() throws IOException {
    FlumeConfiguration conf = FlumeConfiguration.get();

    Path path = new Path("file:///tmp/testfile");
    FileSystem hdfs = path.getFileSystem(conf);
    hdfs.deleteOnExit(path);

    String STRING = "Hello World";

    // writing//  www  . j av  a  2 s  . c  o  m
    FSDataOutputStream dos = hdfs.create(path);
    dos.writeUTF(STRING);
    dos.close();

    // reading
    FSDataInputStream dis = hdfs.open(path);
    String s = dis.readUTF();
    System.out.println(s);
    assertEquals(STRING, s);

    dis.close();

    hdfs.close();
}

From source file:com.cloudera.flume.handlers.hdfs.TestDFSWrite.java

License:Apache License

@Test
public void testHDFSSequenceFileWrite() throws IOException {
    FlumeConfiguration conf = FlumeConfiguration.get();

    Path path = new Path("file:///tmp/testfile");
    FileSystem hdfs = path.getFileSystem(conf);
    hdfs.deleteOnExit(path);

    Event e = new EventImpl("EVENT".getBytes());

    Writer w = SequenceFile.createWriter(hdfs, conf, path, WriteableEventKey.class, WriteableEvent.class);

    // writing//  www  . j  av a2 s . c o m
    w.append(new WriteableEventKey(e), new WriteableEvent(e));
    w.close();

    FileStatus stats = hdfs.getFileStatus(path);
    assertTrue(stats.getLen() > 0);

    // reading
    SequenceFile.Reader r = new SequenceFile.Reader(hdfs, path, conf);
    WriteableEventKey k = new WriteableEventKey();
    WriteableEvent evt = new WriteableEvent();
    r.next(k, evt);

    assertEquals(evt.getTimestamp(), e.getTimestamp());
    assertEquals(evt.getNanos(), e.getNanos());
    assertEquals(evt.getPriority(), e.getPriority());
    assertTrue(Arrays.equals(evt.getBody(), e.getBody()));

    hdfs.close();
}

From source file:com.cloudera.flume.handlers.hdfs.TestDFSWrite.java

License:Apache License

@Test
public void testWhyFail() throws IOException {

    // There a was a failure case using :
    FlumeConfiguration conf = FlumeConfiguration.get();
    Path path = new Path("file:///tmp/testfile");
    FileSystem hdfs = path.getFileSystem(conf);

    // writing/*from  w  w  w. j  av a 2s  .  c om*/
    FSDataOutputStream dos = hdfs.create(path);
    hdfs.deleteOnExit(path);

    // this version's Writer has ownOutputStream=false.
    Writer writer = SequenceFile.createWriter(conf, dos, WriteableEventKey.class, WriteableEvent.class,
            SequenceFile.CompressionType.NONE, new DefaultCodec());

    Event e = new EventImpl("EVENT".getBytes());

    writer.append(new WriteableEventKey(e), new WriteableEvent(e));
    writer.sync();
    writer.close();

    dos.close(); // It is strange that I have to close the underlying
    // FSDataOutputStream.

    // WTF: nothing written by this writer!
    FileStatus stats = hdfs.getFileStatus(path);
    assertTrue(stats.getLen() > 0);
    // it should have written something but it failed.
}

From source file:com.cloudera.flume.PerfHdfsIO.java

License:Apache License

@Test
public void testCopy() throws IOException, InterruptedException {

    Benchmark b = new Benchmark("hdfs seqfile copy");
    b.mark("begin");

    MemorySinkSource mem = FlumeBenchmarkHarness.synthInMem();
    b.mark("disk_loaded");

    File tmp = File.createTempFile("test", "tmp");
    tmp.deleteOnExit();//from   w  ww  .j a  v  a 2s  .c o m
    SeqfileEventSink sink = new SeqfileEventSink(tmp);
    sink.open();
    b.mark("localdisk_write_started");

    EventUtil.dumpAll(mem, sink);

    b.mark("local_disk_write done");

    sink.close();

    FlumeConfiguration conf = FlumeConfiguration.get();
    Path src = new Path(tmp.getAbsolutePath());
    Path dst = new Path("hdfs://localhost/testfile");
    FileSystem hdfs = dst.getFileSystem(conf);
    hdfs.deleteOnExit(dst);

    b.mark("hdfs_copy_started");
    hdfs.copyFromLocalFile(src, dst);
    b.mark("hdfs_copy_done");
    hdfs.close();
    b.done();
}

From source file:com.cloudera.flume.PerfHdfsIO.java

License:Apache License

@Test
public void testDirectWrite() throws IOException, InterruptedException {

    Benchmark b = new Benchmark("hdfs seqfile write");
    b.mark("begin");

    MemorySinkSource mem = FlumeBenchmarkHarness.synthInMem();
    b.mark("disk_loaded");

    FlumeConfiguration conf = FlumeConfiguration.get();
    Path path = new Path("hdfs://localhost/testfile");
    FileSystem hdfs = path.getFileSystem(conf);
    hdfs.deleteOnExit(path);

    Writer w = SequenceFile.createWriter(hdfs, conf, path, WriteableEventKey.class, WriteableEvent.class);
    b.mark("hdfs_fileopen_started");

    Event e = null;/* ww w  . j  a v a 2 s.  com*/
    while ((e = mem.next()) != null) {
        // writing
        w.append(new WriteableEventKey(e), new WriteableEvent(e));
    }
    w.close();
    b.mark("seqfile_hdfs_write");

    hdfs.close();
    b.done();
}

From source file:com.conversantmedia.mapreduce.example.PrepareInputsExample.java

License:Apache License

@DriverCleanup
public void cleanup() throws IOException {
    FileSystem fs = FileSystem.get(getConf());
    fs.deleteOnExit(getWorkingDirectory());
}

From source file:com.ebay.erl.mobius.core.mapred.ConfigurableJob.java

License:Apache License

private static void writePartitionFile(JobConf job, Sampler sampler) {
    try {/*  www.  j  ava2  s.c  om*/
        ////////////////////////////////////////////////
        // first, getting samples from the data sources
        ////////////////////////////////////////////////
        LOGGER.info("Running local sampling for job [" + job.getJobName() + "]");
        InputFormat inf = job.getInputFormat();
        Object[] samples = sampler.getSample(inf, job);
        LOGGER.info("Samples retrieved, sorting...");

        ////////////////////////////////////////////////
        // sort the samples
        ////////////////////////////////////////////////
        RawComparator comparator = job.getOutputKeyComparator();
        Arrays.sort(samples, comparator);

        if (job.getBoolean("mobius.print.sample", false)) {
            PrintWriter pw = new PrintWriter(
                    new OutputStreamWriter(new GZIPOutputStream(new BufferedOutputStream(new FileOutputStream(
                            new File(job.get("mobius.sample.file", "./samples.txt.gz")))))));
            for (Object obj : samples) {
                pw.println(obj);
            }
            pw.flush();
            pw.close();
        }

        ////////////////////////////////////////////////
        // start to write partition files
        ////////////////////////////////////////////////

        FileSystem fs = FileSystem.get(job);
        Path partitionFile = fs.makeQualified(new Path(TotalOrderPartitioner.getPartitionFile(job)));
        while (fs.exists(partitionFile)) {
            partitionFile = new Path(partitionFile.toString() + "." + System.currentTimeMillis());
        }
        fs.deleteOnExit(partitionFile);
        TotalOrderPartitioner.setPartitionFile(job, partitionFile);
        LOGGER.info("write partition file to:" + partitionFile.toString());

        int reducersNbr = job.getNumReduceTasks();
        Set<Object> wroteSamples = new HashSet<Object>();

        SequenceFile.Writer writer = SequenceFile.createWriter(fs, job, partitionFile, Tuple.class,
                NullWritable.class);

        float avgReduceSize = samples.length / reducersNbr;

        int lastBegin = 0;
        for (int i = 0; i < samples.length;) {
            // trying to distribute the load for every reducer evenly,
            // dividing the <code>samples</code> into a set of blocks
            // separated by boundaries, objects that selected from the
            // <code>samples</code> array, and each blocks should have
            // about the same size.

            // find the last index of element that equals to samples[i], as
            // such element might appear multiple times in the samples.
            int upperBound = Util.findUpperBound(samples, samples[i], comparator);

            int lowerBound = i;//Util.findLowerBound(samples, samples[i], comparator);

            // the repeat time of samples[i], if the key itself is too big
            // select it as boundary
            int currentElemSize = upperBound - lowerBound + 1;

            if (currentElemSize > avgReduceSize * 2) // greater than two times of average reducer size
            {
                // the current element is too big, greater than
                // two times of the <code>avgReduceSize</code>, 
                // put itself as boundary
                writer.append(((DataJoinKey) samples[i]).getKey(), NullWritable.get());
                wroteSamples.add(((DataJoinKey) samples[i]).getKey());
                //pw.println(samples[i]);

                // immediate put the next element to the boundary,
                // the next element starts at <code> upperBound+1
                // </code>, to prevent the current one consume even 
                // more.
                if (upperBound + 1 < samples.length) {
                    writer.append(((DataJoinKey) samples[upperBound + 1]).getKey(), NullWritable.get());
                    wroteSamples.add(((DataJoinKey) samples[upperBound + 1]).getKey());
                    //pw.println(samples[upperBound+1]);

                    // move on to the next element of <code>samples[upperBound+1]/code>
                    lastBegin = Util.findUpperBound(samples, samples[upperBound + 1], comparator) + 1;
                    i = lastBegin;
                } else {
                    break;
                }
            } else {
                // current element is small enough to be consider
                // with previous group
                int size = upperBound - lastBegin;
                if (size > avgReduceSize) {
                    // by including the current elements, we have
                    // found a block that's big enough, select it
                    // as boundary
                    writer.append(((DataJoinKey) samples[i]).getKey(), NullWritable.get());
                    wroteSamples.add(((DataJoinKey) samples[i]).getKey());
                    //pw.println(samples[i]);

                    i = upperBound + 1;
                    lastBegin = i;
                } else {
                    i = upperBound + 1;
                }
            }
        }

        writer.close();

        // if the number of wrote samples doesn't equals to number of
        // reducer minus one, then it means the key spaces is too small
        // hence TotalOrderPartitioner won't work, it works only if 
        // the partition boundaries are distinct.
        //
        // we need to change the number of reducers
        if (wroteSamples.size() + 1 != reducersNbr) {
            LOGGER.info("Write complete, but key space is too small, sample size=" + wroteSamples.size()
                    + ", reducer size:" + (reducersNbr));
            LOGGER.info("Set the reducer size to:" + (wroteSamples.size() + 1));

            // add 1 because the wrote samples define boundary, ex, if
            // the sample size is two with two element [300, 1000], then 
            // there should be 3 reducers, one for handling i<300, one 
            // for n300<=i<1000, and another one for 1000<=i
            job.setNumReduceTasks((wroteSamples.size() + 1));
        }

        samples = null;
    } catch (IOException e) {
        LOGGER.error(e.getMessage(), e);
        throw new RuntimeException(e);
    }
}