Example usage for org.apache.hadoop.fs FileSystem deleteOnExit

List of usage examples for org.apache.hadoop.fs FileSystem deleteOnExit

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem deleteOnExit.

Prototype

Set deleteOnExit

To view the source code for org.apache.hadoop.fs FileSystem deleteOnExit.

Click Source Link

Document

A cache of files that should be deleted when the FileSystem is closed or the JVM is exited.

Usage

From source file:org.qcri.algebra.MultiplicationTest.java

License:Apache License

@Before
public void setup() throws Exception {
    NMFCommon.DEFAULT_REDUCESPLOTS = 2;// www. ja  v  a  2 s . co m
    dot(inputVectorsA, inputVectorsA2, dotVectors);
    composite(inputVectorsA, inputVectorsA2, inputVectorsAsquare, compositeVectors);
    product(inputVectorsA, inputVectorsB, productVectors);
    ata(inputVectorsA, ataVectors);
    conf = new Configuration();
    conf.set("mapreduce.job.tracker", "local");
    conf.set("fs.default.name", "file:///");
    long currTime = System.currentTimeMillis();
    Path testbed = new Path("/tmp/" + currTime);
    output = new Path(testbed, "output");
    tmp = new Path(testbed, "tmp");
    FileSystem fs;
    try {
        fs = FileSystem.get(output.toUri(), conf);
        fs.mkdirs(output);
        fs.mkdirs(tmp);
        fs.deleteOnExit(testbed);
    } catch (IOException e) {
        e.printStackTrace();
        Assert.fail("Error in creating output direcoty " + output);
        return;
    }
    aTranspose = transpose(inputVectorsA);
    aDensePath = AlgebraCommon.toDenseMapDir(inputVectorsA, tmp, tmp, "matrixADense").getRowPath();
    aSquareDensePath = AlgebraCommon.toDenseMapDir(inputVectorsAsquare, tmp, tmp, "matrixASqaureDense")
            .getRowPath();
    a2DensePath = AlgebraCommon.toDenseMapDir(inputVectorsA2, tmp, tmp, "matrixA2Dense").getRowPath();
    bDensePath = AlgebraCommon.toDenseMapDir(inputVectorsB, tmp, tmp, "matrixBDense").getRowPath();
    atDensePath = AlgebraCommon.toDenseMapDir(aTranspose, tmp, tmp, "matrixAtDense").getRowPath();
    aSparsePath = AlgebraCommon.toSparseMapDir(inputVectorsA, tmp, tmp, "matrixASparse").getRowPath();
    aSquareSparsePath = AlgebraCommon.toSparseMapDir(inputVectorsAsquare, tmp, tmp, "matrixASqaureSparse")
            .getRowPath();
    a2SparsePath = AlgebraCommon.toSparseMapDir(inputVectorsA2, tmp, tmp, "matrixA2Sparse").getRowPath();
    bSparsePath = AlgebraCommon.toSparseMapDir(inputVectorsB, tmp, tmp, "matrixBSparse").getRowPath();
    atSparsePath = AlgebraCommon.toSparseMapDir(aTranspose, tmp, tmp, "matrixAtSparse").getRowPath();
}

From source file:org.qcri.pca.CompositeJobTest.java

License:Apache License

@Before
public void setup() throws Exception {
    conf = new Configuration();
    long currTime = System.currentTimeMillis();
    Path outputDir = new Path("/tmp/" + currTime);
    FileSystem fs;
    try {//from  www . j  a v  a2 s .  c om
        fs = FileSystem.get(outputDir.toUri(), conf);
        fs.mkdirs(outputDir);
        fs.deleteOnExit(outputDir);
    } catch (IOException e) {
        e.printStackTrace();
        Assert.fail("Error in creating output direcoty " + outputDir);
        return;
    }
    ym = computeMean(inputVectors);
    double[] xm = new double[xsize];
    times(ym, inMemMatrix, xm);
    ymPath = PCACommon.toDistributedVector(new DenseVector(ym), outputDir, "ym", conf);
    xmPath = PCACommon.toDistributedVector(new DenseVector(xm), outputDir, "xm", conf);
    DistributedRowMatrix distMatrix = PCACommon.toDistributedRowMatrix(new DenseMatrix(inMemMatrix), outputDir,
            outputDir, "inMemMatrix");
    inMemMatrixPath = distMatrix.getRowPath();
    for (double[] row : xtx)
        for (int c = 0; c < row.length; c++)
            row[c] = 0;
    for (double[] row : ytx)
        for (int c = 0; c < row.length; c++)
            row[c] = 0;
    computeXtXandYtX(inputVectors);
}

From source file:org.qcri.pca.Norm2JobTest.java

License:Apache License

@Before
public void setup() throws Exception {
    conf = new Configuration();
    long currTime = System.currentTimeMillis();
    Path meanSpanDirPath = new Path("/tmp/" + currTime + "/meanSpan");
    meanSpanFilePath = new MeanAndSpanJob().getMeanSpanPath(meanSpanDirPath);
    FileSystem fs;
    try {/*from   w w  w. j  ava 2s  .c om*/
        fs = FileSystem.get(meanSpanDirPath.toUri(), conf);
        fs.mkdirs(meanSpanDirPath);
        fs.deleteOnExit(meanSpanDirPath);
    } catch (IOException e) {
        e.printStackTrace();
        Assert.fail("Error in creating meanSpan direcoty " + meanSpanDirPath);
        return;
    }
    prepareTheMeanSpanFile(fs);
}

From source file:org.qcri.pca.PCATest.java

License:Apache License

@Before
public void setup() {
    ppcaDriver = new SPCADriver() {
        public Path getTempPath() {
            return tmp;
        }//from w ww .j a va2s. c  o m
    };
    N = 527;
    D = 38;
    d = 8;
    conf = new Configuration();
    conf.set("mapred.job.tracker", "local");
    conf.set("fs.default.name", "file:///");
    URL inputURL = this.getClass().getResource("/input.water");
    input = new Path(inputURL.toString());
    long currTime = System.currentTimeMillis();
    output = new Path("/tmp/" + currTime + "/output");
    tmp = new Path("/tmp/" + currTime + "/tmp");
    FileSystem fs;
    try {
        fs = FileSystem.get(output.toUri(), conf);
        fs.mkdirs(output);
        fs.mkdirs(tmp);
        fs.deleteOnExit(output);
        fs.deleteOnExit(tmp);
    } catch (IOException e) {
        e.printStackTrace();
        Assert.fail("Error in creating output direcoty " + output);
        return;
    }
}

From source file:org.qcri.pca.ReconstructionErrJobTest.java

License:Apache License

@Before
public void setup() throws Exception {
    conf = new Configuration();
    long currTime = System.currentTimeMillis();
    Path outputDir = new Path("/tmp/" + currTime);
    FileSystem fs;
    try {/*  w  w  w. j  av  a  2  s  .  com*/
        fs = FileSystem.get(outputDir.toUri(), conf);
        fs.mkdirs(outputDir);
        fs.deleteOnExit(outputDir);
    } catch (IOException e) {
        e.printStackTrace();
        Assert.fail("Error in creating output direcoty " + outputDir);
        return;
    }
    ym = computeMean(inputVectors);
    double[] xm = new double[xsize];
    times(ym, y2xVectors, xm);
    double[] zm = new double[cols];
    timesTranspose(xm, cVectors, zm);
    for (int c = 0; c < cols; c++)
        zm[c] -= ym[c];
    ymPath = PCACommon.toDistributedVector(new DenseVector(ym), outputDir, "ym", conf);
    zmPath = PCACommon.toDistributedVector(new DenseVector(zm), outputDir, "zm", conf);
    DistributedRowMatrix distMatrix = PCACommon.toDistributedRowMatrix(new DenseMatrix(y2xVectors), outputDir,
            outputDir, "y2xMatrix");
    y2xMatrixPath = distMatrix.getRowPath();
    distMatrix = PCACommon.toDistributedRowMatrix(new DenseMatrix(cVectors), outputDir, outputDir, "cMatrix");
    cMatrixPath = distMatrix.getRowPath();
    computeError(inputVectors);
}

From source file:org.qcri.pca.VarianceJobTest.java

License:Apache License

@Before
public void setup() throws Exception {
    conf = new Configuration();
    long currTime = System.currentTimeMillis();
    Path outputDir = new Path("/tmp/" + currTime);
    FileSystem fs;
    try {//ww w  . j  a  v a  2s  . co  m
        fs = FileSystem.get(outputDir.toUri(), conf);
        fs.mkdirs(outputDir);
        fs.deleteOnExit(outputDir);
    } catch (IOException e) {
        e.printStackTrace();
        Assert.fail("Error in creating output direcoty " + outputDir);
        return;
    }
    ym = computeMean(inputVectors);
    double[] xm = new double[xsize];
    times(ym, y2xVectors, xm);
    ymPath = PCACommon.toDistributedVector(new DenseVector(ym), outputDir, "ym", conf);
    xmPath = PCACommon.toDistributedVector(new DenseVector(xm), outputDir, "xm", conf);
    DistributedRowMatrix distMatrix = PCACommon.toDistributedRowMatrix(new DenseMatrix(y2xVectors), outputDir,
            outputDir, "y2xMatrix");
    y2xMatrixPath = distMatrix.getRowPath();
    distMatrix = PCACommon.toDistributedRowMatrix(new DenseMatrix(cVectors), outputDir, outputDir, "cMatrix");
    cMatrixPath = distMatrix.getRowPath();
}

From source file:org.terrier.utility.io.HadoopPlugin.java

License:Mozilla Public License

/** Initialises the Plugin, by connecting to the distributed file system */
public void initialise() throws Exception {
    config = getGlobalConfiguration();//from   w w w .j ava2s . c  o m

    final org.apache.hadoop.fs.FileSystem DFS = hadoopFS = org.apache.hadoop.fs.FileSystem.get(config);

    FileSystem terrierDFS = new FileSystem() {
        public String name() {
            return "hdfs";
        }

        /** capabilities of the filesystem */
        public byte capabilities() {
            return FSCapability.READ | FSCapability.WRITE | FSCapability.RANDOM_READ | FSCapability.STAT
                    | FSCapability.DEL_ON_EXIT | FSCapability.LS_DIR;
        }

        public String[] schemes() {
            return new String[] { "dfs", "hdfs" };
        }

        /** returns true if the path exists */
        public boolean exists(String filename) throws IOException {
            if (logger.isDebugEnabled())
                logger.debug("Checking that " + filename + " exists answer=" + DFS.exists(new Path(filename)));
            return DFS.exists(new Path(filename));
        }

        /** open a file of given filename for reading */
        public InputStream openFileStream(String filename) throws IOException {
            if (logger.isDebugEnabled())
                logger.debug("Opening " + filename);
            return DFS.open(new Path(filename));
        }

        /** open a file of given filename for writing */
        public OutputStream writeFileStream(String filename) throws IOException {
            if (logger.isDebugEnabled())
                logger.debug("Creating " + filename);
            return DFS.create(new Path(filename));
        }

        public boolean mkdir(String filename) throws IOException {
            return DFS.mkdirs(new Path(filename));
        }

        public RandomDataOutput writeFileRandom(String filename) throws IOException {
            throw new IOException("HDFS does not support random writing");
        }

        public RandomDataInput openFileRandom(String filename) throws IOException {
            return new HadoopFSRandomAccessFile(DFS, filename);
        }

        public boolean delete(String filename) throws IOException {
            return DFS.delete(new Path(filename), true);
        }

        public boolean deleteOnExit(String filename) throws IOException {
            return DFS.deleteOnExit(new Path(filename));
        }

        public String[] list(String path) throws IOException {
            final FileStatus[] contents = DFS.listStatus(new Path(path));
            if (contents == null)
                throw new FileNotFoundException("Cannot list path " + path);
            final String[] names = new String[contents.length];
            for (int i = 0; i < contents.length; i++) {
                names[i] = contents[i].getPath().getName();
            }
            return names;
        }

        public String getParent(String path) throws IOException {
            return new Path(path).getParent().getName();
        }

        public boolean rename(String source, String destination) throws IOException {
            return DFS.rename(new Path(source), new Path(destination));
        }

        public boolean isDirectory(String path) throws IOException {
            return DFS.getFileStatus(new Path(path)).isDir();
        }

        public long length(String path) throws IOException {
            return DFS.getFileStatus(new Path(path)).getLen();
        }

        public boolean canWrite(String path) throws IOException {
            return DFS.getFileStatus(new Path(path)).getPermission().getUserAction().implies(FsAction.WRITE);
        }

        public boolean canRead(String path) throws IOException {
            return DFS.getFileStatus(new Path(path)).getPermission().getUserAction().implies(FsAction.READ);
        }
    };
    Files.addFileSystemCapability(terrierDFS);
}

From source file:org.terrier.utility.io.HadoopUtility.java

License:Mozilla Public License

protected static Path makeTemporaryFile(JobConf jobConf, String filename) throws IOException {
    final int randomKey = jobConf.getInt("terrier.tempfile.id", random.nextInt());
    jobConf.setInt("terrier.tempfile.id", randomKey);
    FileSystem defFS = FileSystem.get(jobConf);
    final Path tempFile = new Path(HADOOP_TMP_PATH + "/" + (randomKey) + "-" + filename);
    defFS.deleteOnExit(tempFile);
    return tempFile;
}

From source file:org.terrier.utility.io.HadoopUtility.java

License:Mozilla Public License

protected static void saveApplicationSetupToJob(JobConf jobConf, boolean getFreshProperties) throws Exception {
    // Do we load a fresh properties File?
    //TODO fix, if necessary
    //if (getFreshProperties)
    //   loadApplicationSetup(new Path(ApplicationSetup.TERRIER_HOME));

    FileSystem remoteFS = FileSystem.get(jobConf);
    URI remoteFSURI = remoteFS.getUri();
    //make a copy of the current application setup properties, these may be amended
    //as some files are more globally accessible
    final Properties propertiesDuringJob = new Properties();
    Properties appProperties = ApplicationSetup.getProperties();
    for (Object _key : appProperties.keySet()) {
        String key = (String) _key;
        propertiesDuringJob.put(key, appProperties.get(key));
    }/*from  w  w w. j  a va2  s  .c o  m*/

    //the share folder is needed during indexing, save this on DFS
    if (Files.getFileSystemName(ApplicationSetup.TERRIER_SHARE).equals("local")) {
        Path tempTRShare = makeTemporaryFile(jobConf, "terrier.share");
        propertiesDuringJob.setProperty("terrier.share", remoteFSURI.resolve(tempTRShare.toUri()).toString());
        if (Files.exists(ApplicationSetup.TERRIER_SHARE)) {
            jobConf.set("terrier.share.copied", remoteFSURI.resolve(tempTRShare.toUri()).toString());
            logger.info("Copying terrier share/ directory (" + ApplicationSetup.TERRIER_SHARE
                    + ") to shared storage area (" + remoteFSURI.resolve(tempTRShare.toUri()).toString() + ")");
            FileUtil.copy(FileSystem.getLocal(jobConf), new Path(ApplicationSetup.TERRIER_SHARE), remoteFS,
                    tempTRShare, false, false, jobConf);
        } else {
            logger.warn(
                    "No terrier.share folder found at " + ApplicationSetup.TERRIER_SHARE + ", copying skipped");
        }
    }

    //copy the terrier.properties content over
    Path tempTRProperties = makeTemporaryFile(jobConf, "terrier.properties");
    logger.debug("Writing terrier properties out to DFS " + tempTRProperties.toString());
    OutputStream out = remoteFS.create(tempTRProperties);
    remoteFS.deleteOnExit(tempTRProperties);
    propertiesDuringJob.store(out, "Automatically generated by HadoopUtility.saveApplicationSetupToJob()");
    out.close();
    out = null;
    DistributedCache.addCacheFile(tempTRProperties.toUri().resolve(new URI("#terrier.properties")), jobConf);
    DistributedCache.createSymlink(jobConf);

    //copy the non-JVM system properties over as well
    Path tempSysProperties = makeTemporaryFile(jobConf, "system.properties");
    DataOutputStream dos = FileSystem.get(jobConf).create(tempSysProperties);
    logger.debug("Writing system properties out to DFS " + tempSysProperties.toString());
    for (Object _propertyKey : System.getProperties().keySet()) {
        String propertyKey = (String) _propertyKey;
        if (!startsWithAny(propertyKey, checkSystemProperties)) {
            dos.writeUTF(propertyKey);
            dos.writeUTF(System.getProperty(propertyKey));
        }
    }
    dos.writeUTF("FIN");
    dos.close();
    dos = null;
    DistributedCache.addCacheFile(tempSysProperties.toUri().resolve(new URI("#system.properties")), jobConf);
}

From source file:simsql.runtime.MRLoader.java

License:Apache License

public long run(String inputPath, String outputPath, short typeCode, Relation r, int sortAtt) {

    // make a directory for the relation
    Configuration conf = new Configuration();
    FileSystem dfs = null;

    try {//from www  .  ja  v  a2 s  . c o  m
        dfs = FileSystem.get(conf);
    } catch (Exception e) {
        throw new RuntimeException("Cannot access HDFS!", e);
    }

    try {
        // if it exists, destroy it.
        Path path = new Path(outputPath);
        if (dfs.exists(path)) {
            dfs.delete(path, true);
        }
    } catch (Exception e) {
        throw new RuntimeException("Could not create the file to bulk load to!", e);
    }

    // find a file name 
    String tempPath = null;
    if (inputPath.startsWith("hdfs:")) {
        tempPath = inputPath.replace("hdfs:", "");
    } else {
        tempPath = "/tempDataFile_" + r.getName();
        try {
            dfs.delete(new Path(tempPath), true);
        } catch (Exception e) {
            // ignore this.
        }

        // upload the text file
        try {
            dfs.copyFromLocalFile(false, true, new Path(inputPath), new Path(tempPath));
            dfs.deleteOnExit(new Path(tempPath));
        } catch (Exception e) {
            throw new RuntimeException("Failed to upload text file " + inputPath + " to HDFS!", e);
        }
    }

    // set up the new job's parameters.
    conf.setBoolean("mapred.compress.map.output", true);
    conf.set("mapred.map.output.compression.codec", RecordCompression.getCodecClass());

    conf.set("io.serializations",
            "simsql.runtime.RecordSerialization,simsql.runtime.RecordKeySerialization,org.apache.hadoop.io.serializer.WritableSerialization");
    conf.setInt("simsql.loader.numAtts", r.getAttributes().size());
    conf.setInt("simsql.loader.typeCode", (int) typeCode);
    conf.setInt("simsql.loader.sortAtt", sortAtt);

    String[] myStrings = new String[r.getAttributes().size()];
    int j = 0;
    for (simsql.compiler.Attribute a : r.getAttributes()) {
        myStrings[j++] = a.getPhysicalRealization().getClass().getName();
    }

    conf.setStrings("simsql.loader.types", myStrings);

    // create a job
    Job job;
    try {
        job = new Job(conf);
    } catch (Exception e) {
        throw new RuntimeException("Unable to create bulk loading job!", e);
    }

    // set the split size (number of mappers)
    long fSize = 0;
    if (inputPath.startsWith("hdfs")) {
        fSize = RelOp.getPathsTotalSize(new String[] { tempPath });
    } else {
        fSize = new File(inputPath).length();
    }

    FileInputFormat.setMinInputSplitSize(job, fSize / (long) numTasks);
    FileInputFormat.setMaxInputSplitSize(job, fSize / (long) numTasks);

    // and the number of reducers
    job.setNumReduceTasks(numTasks);

    // the mapper/reducer/jar
    job.setMapperClass(MRLoaderMapper.class);
    job.setReducerClass(MRLoaderReducer.class);
    job.setJarByClass(MRLoader.class);

    // I/O settings.
    job.setOutputFormatClass(RecordOutputFormat.class);

    job.setMapOutputKeyClass(RecordKey.class);
    job.setMapOutputValueClass(RecordWrapper.class);
    job.setOutputKeyClass(Nothing.class);
    job.setOutputValueClass(Record.class);
    try {
        FileInputFormat.setInputPaths(job, new Path(tempPath));
        FileOutputFormat.setOutputPath(job, new Path(outputPath));
    } catch (Exception e) {
        throw new RuntimeException("Could not set job inputs/outputs", e);
    }
    job.setGroupingComparatorClass(RecordKeyGroupingComparator.class);
    job.setPartitionerClass(RecordPartitioner.class);
    job.setSortComparatorClass(RecordKeySortComparator.class);

    job.setJobName("MRLoader: " + inputPath + " ==> " + outputPath);

    // run it
    Counters counters;
    try {
        job.waitForCompletion(true);
        counters = job.getCounters();
    } catch (Exception e) {
        throw new RuntimeException("Could not set up bulk loader job!", e);
    }

    // now, delete all the empty part files
    try {

        // get a filesystem
        FileSystem ddfs = FileSystem.get(conf);
        Path outPath = new Path(outputPath);
        if (ddfs.exists(outPath) && ddfs.isDirectory(outPath)) {
            FileStatus fstatus[] = ddfs.listStatus(outPath, new TableFileFilter());
            for (FileStatus ff : fstatus) {
                if (ddfs.getContentSummary(ff.getPath()).getLength() <= 4) { // snappy leaves 4-byte long files around...
                    ddfs.delete(ff.getPath(), true);
                }
            }
        }
    } catch (Exception e) { // this isn't disastrous 
    }

    // get the counter for the output of the mapper.
    Counter bytesCounter = counters.findCounter(OutputFileSerializer.Counters.BYTES_WRITTEN);
    return bytesCounter.getValue();
}