Example usage for org.apache.hadoop.fs FileSystem delete

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem delete.

Prototype

public abstract boolean delete(Path f, boolean recursive) throws IOException;

Source Link

Document

Delete a file.

Usage

From source file:com.hp.hpl.jena.sparql.algebra.MyOpVisitor.java

License:Open Source License

public void execute() {

    Configuration conf = new Configuration();
    FileSystem fs = null;
    try {//from  w ww  . j  a  v a  2  s . com
        fs = FileSystem.get(conf);
        Path out = new Path("output");
        if (!fs.exists(out)) {
            fs.delete(out, true);
            fs.mkdirs(out);
        }
    } catch (IOException e) {
        e.printStackTrace();
    }

    Triple[] Q = new Triple[0];
    Q = opBGP.getPattern().getList().toArray(Q);
    Set<Var> vars = PatternVars.vars(query.getQueryPattern());

    JoinPlaner.setid(id);
    JoinPlaner.newVaRS(vars);
    try {
        JoinPlaner.form(Q);
        JoinPlaner.removeNonJoiningVaribles(Q);
        int i = 0;
        while (!JoinPlaner.isEmpty()) {
            String v = JoinPlaner.getNextJoin();
            System.out.println(v);
            i++;
        }
        if (i == 0) {
            Path outFile = new Path("output/Join_" + id + "_" + 0);
            OutputBuffer out = new OutputBuffer(outFile, fs);
            //if (fs.exists(outFile)) {
            //   fs.delete(outFile,true);
            //}
            //fs.create(outFile);
            QueryProcessor.executeSelect(Q[0], out, "P0");
        }
    } catch (Exception e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }

}

From source file:com.ibm.bi.dml.runtime.io.WriterMatrixMarket.java

License:Open Source License

/**
 * //  w ww  .  j  a  va  2s. co  m
 * @param srcFileName
 * @param fileName
 * @param rlen
 * @param clen
 * @param nnz
 * @throws IOException
 */
public void mergeTextcellToMatrixMarket(String srcFileName, String fileName, long rlen, long clen, long nnz)
        throws IOException {
    Configuration conf = new Configuration(ConfigurationManager.getCachedJobConf());

    Path src = new Path(srcFileName);
    Path merge = new Path(fileName);
    FileSystem hdfs = FileSystem.get(conf);

    if (hdfs.exists(merge)) {
        hdfs.delete(merge, true);
    }

    OutputStream out = hdfs.create(merge, true);

    // write out the header first 
    StringBuilder sb = new StringBuilder();
    sb.append("%%MatrixMarket matrix coordinate real general\n");

    // output number of rows, number of columns and number of nnz
    sb.append(rlen + " " + clen + " " + nnz + "\n");
    out.write(sb.toString().getBytes());

    // if the source is a directory
    if (hdfs.getFileStatus(src).isDirectory()) {
        try {
            FileStatus[] contents = hdfs.listStatus(src);
            for (int i = 0; i < contents.length; i++) {
                if (!contents[i].isDirectory()) {
                    InputStream in = hdfs.open(contents[i].getPath());
                    try {
                        IOUtils.copyBytes(in, out, conf, false);
                    } finally {
                        IOUtilFunctions.closeSilently(in);
                    }
                }
            }
        } finally {
            IOUtilFunctions.closeSilently(out);
        }
    } else if (hdfs.isFile(src)) {
        InputStream in = null;
        try {
            in = hdfs.open(src);
            IOUtils.copyBytes(in, out, conf, true);
        } finally {
            IOUtilFunctions.closeSilently(in);
            IOUtilFunctions.closeSilently(out);
        }
    } else {
        throw new IOException(src.toString() + ": No such file or directory");
    }
}

From source file:com.ibm.bi.dml.runtime.io.WriterTextCSV.java

License:Open Source License

/**
 * Method to merge multiple CSV part files on HDFS into a single CSV file on HDFS. 
 * The part files are created by CSV_WRITE MR job. 
 * //w  w w.  j  a v  a2s. com
 * This method is invoked from CP-write instruction.
 * 
 * @param srcFileName
 * @param destFileName
 * @param csvprop
 * @param rlen
 * @param clen
 * @throws IOException
 */
public void mergeCSVPartFiles(String srcFileName, String destFileName, CSVFileFormatProperties csvprop,
        long rlen, long clen) throws IOException {
    Configuration conf = new Configuration(ConfigurationManager.getCachedJobConf());

    Path srcFilePath = new Path(srcFileName);
    Path mergedFilePath = new Path(destFileName);
    FileSystem hdfs = FileSystem.get(conf);

    if (hdfs.exists(mergedFilePath)) {
        hdfs.delete(mergedFilePath, true);
    }
    OutputStream out = hdfs.create(mergedFilePath, true);

    // write out the header, if needed
    if (csvprop.hasHeader()) {
        StringBuilder sb = new StringBuilder();
        for (int i = 0; i < clen; i++) {
            sb.append("C" + (i + 1));
            if (i < clen - 1)
                sb.append(csvprop.getDelim());
        }
        sb.append('\n');
        out.write(sb.toString().getBytes());
        sb.setLength(0);
    }

    // if the source is a directory
    if (hdfs.isDirectory(srcFilePath)) {
        try {
            FileStatus[] contents = hdfs.listStatus(srcFilePath);
            Path[] partPaths = new Path[contents.length];
            int numPartFiles = 0;
            for (int i = 0; i < contents.length; i++) {
                if (!contents[i].isDirectory()) {
                    partPaths[i] = contents[i].getPath();
                    numPartFiles++;
                }
            }
            Arrays.sort(partPaths);

            for (int i = 0; i < numPartFiles; i++) {
                InputStream in = hdfs.open(partPaths[i]);
                try {
                    IOUtils.copyBytes(in, out, conf, false);
                    if (i < numPartFiles - 1)
                        out.write('\n');
                } finally {
                    IOUtilFunctions.closeSilently(in);
                }
            }
        } finally {
            IOUtilFunctions.closeSilently(out);
        }
    } else if (hdfs.isFile(srcFilePath)) {
        InputStream in = null;
        try {
            in = hdfs.open(srcFilePath);
            IOUtils.copyBytes(in, out, conf, true);
        } finally {
            IOUtilFunctions.closeSilently(in);
            IOUtilFunctions.closeSilently(out);
        }
    } else {
        throw new IOException(srcFilePath.toString() + ": No such file or directory");
    }
}

From source file:com.ibm.bi.dml.runtime.io.WriterTextCSV.java

License:Open Source License

/**
 * /*from  w  w  w  .j av  a2 s . c  o m*/
 * @param srcFileName
 * @param destFileName
 * @param csvprop
 * @param rlen
 * @param clen
 * @throws IOException
 */
@SuppressWarnings("unchecked")
public void addHeaderToCSV(String srcFileName, String destFileName, long rlen, long clen) throws IOException {
    Configuration conf = new Configuration(ConfigurationManager.getCachedJobConf());

    Path srcFilePath = new Path(srcFileName);
    Path destFilePath = new Path(destFileName);
    FileSystem hdfs = FileSystem.get(conf);

    if (!_props.hasHeader()) {
        // simply move srcFile to destFile

        /*
         * TODO: Remove this roundabout way! 
         * For example: destFilePath = /user/biadmin/csv/temp/out/file.csv 
         *              & the only path that exists already on HDFS is /user/biadmin/csv/.
         * In this case: the directory structure /user/biadmin/csv/temp/out must be created. 
         * Simple hdfs.rename() does not seem to create this directory structure.
         */

        // delete the destination file, if exists already
        //boolean ret1 = 
        hdfs.delete(destFilePath, true);

        // Create /user/biadmin/csv/temp/out/file.csv so that ..../temp/out/ is created.
        //boolean ret2 = 
        hdfs.createNewFile(destFilePath);

        // delete the file "file.csv" but preserve the directory structure /user/biadmin/csv/temp/out/
        //boolean ret3 = 
        hdfs.delete(destFilePath, true);

        // finally, move the data to destFilePath = /user/biadmin/csv/temp/out/file.csv
        //boolean ret4 = 
        hdfs.rename(srcFilePath, destFilePath);

        //System.out.println("Return values = del:" + ret1 + ", createNew:" + ret2 + ", del:" + ret3 + ", rename:" + ret4);
        return;
    }

    // construct the header line
    StringBuilder sb = new StringBuilder();
    for (int i = 0; i < clen; i++) {
        sb.append("C" + (i + 1));
        if (i < clen - 1)
            sb.append(_props.getDelim());
    }
    sb.append('\n');

    if (hdfs.isDirectory(srcFilePath)) {

        // compute sorted order among part files
        ArrayList<Path> files = new ArrayList<Path>();
        for (FileStatus stat : hdfs.listStatus(srcFilePath, CSVReblockMR.hiddenFileFilter))
            files.add(stat.getPath());
        Collections.sort(files);

        // first part file path
        Path firstpart = files.get(0);

        // create a temp file, and add header and contents of first part
        Path tmp = new Path(firstpart.toString() + ".tmp");
        OutputStream out = hdfs.create(tmp, true);
        out.write(sb.toString().getBytes());
        sb.setLength(0);

        // copy rest of the data from firstpart
        InputStream in = null;
        try {
            in = hdfs.open(firstpart);
            IOUtils.copyBytes(in, out, conf, true);
        } finally {
            IOUtilFunctions.closeSilently(in);
            IOUtilFunctions.closeSilently(out);
        }

        // rename tmp to firstpart
        hdfs.delete(firstpart, true);
        hdfs.rename(tmp, firstpart);

        // rename srcfile to destFile
        hdfs.delete(destFilePath, true);
        hdfs.createNewFile(destFilePath); // force the creation of directory structure
        hdfs.delete(destFilePath, true); // delete the file, but preserve the directory structure
        hdfs.rename(srcFilePath, destFilePath); // move the data 

    } else if (hdfs.isFile(srcFilePath)) {
        // create destination file
        OutputStream out = hdfs.create(destFilePath, true);

        // write header
        out.write(sb.toString().getBytes());
        sb.setLength(0);

        // copy the data from srcFile
        InputStream in = null;
        try {
            in = hdfs.open(srcFilePath);
            IOUtils.copyBytes(in, out, conf, true);
        } finally {
            IOUtilFunctions.closeSilently(in);
            IOUtilFunctions.closeSilently(out);
        }
    } else {
        throw new IOException(srcFilePath.toString() + ": No such file or directory");
    }
}

From source file:com.ibm.bi.dml.runtime.matrix.data.MultipleOutputCommitter.java

License:Open Source License

@Override
public void cleanupJob(JobContext context) throws IOException {
    JobConf conf = context.getJobConf();
    // do the clean up of temporary directory
    Path outputPath = FileOutputFormat.getOutputPath(conf);
    if (outputPath != null) {
        FileSystem fs = outputPath.getFileSystem(conf);
        context.getProgressible().progress();
        if (fs.exists(outputPath))
            fs.delete(outputPath, true);
    }//  w  w  w .j a  va2s  .  c o m
}

From source file:com.ibm.bi.dml.runtime.matrix.data.MultipleOutputCommitter.java

License:Open Source License

@Override
public void commitTask(TaskAttemptContext context) throws IOException {
    JobConf conf = context.getJobConf();
    TaskAttemptID attemptId = context.getTaskAttemptID();

    // get the mapping between index to output filename
    outputs = MRJobConfiguration.getOutputs(conf);

    //get temp task output path (compatible with hadoop1 and hadoop2)
    Path taskOutPath = FileOutputFormat.getWorkOutputPath(conf);
    FileSystem fs = taskOutPath.getFileSystem(conf);
    if (!fs.exists(taskOutPath))
        throw new IOException("Task output path " + taskOutPath.toString() + "does not exist.");

    // Move the task outputs to their final places
    context.getProgressible().progress();
    moveFinalTaskOutputs(context, fs, taskOutPath);

    // Delete the temporary task-specific output directory
    if (!fs.delete(taskOutPath, true))
        LOG.debug(/*from   w w w.ja v a 2 s.  c  om*/
                "Failed to delete the temporary output directory of task: " + attemptId + " - " + taskOutPath);
}

From source file:com.ibm.bi.dml.runtime.matrix.data.MultipleOutputCommitter.java

License:Open Source License

/**
 * //from   w w  w . ja v a 2s  .  c o m
 * @param context
 * @param fs
 * @param file
 * @throws IOException
 */
private void moveFileToDestination(TaskAttemptContext context, FileSystem fs, Path file) throws IOException {
    JobConf conf = context.getJobConf();
    TaskAttemptID attemptId = context.getTaskAttemptID();

    //get output index and final destination
    String taskType = (conf.getBoolean(JobContext.TASK_ISMAP, true)) ? "m" : "r";
    String name = file.getName();
    int charIx = name.indexOf("-" + taskType + "-");
    int index = Integer.parseInt(name.substring(0, charIx));
    Path finalPath = new Path(outputs[index], file.getName());

    //move file from 'file' to 'finalPath'
    if (!fs.rename(file, finalPath)) {
        if (!fs.delete(finalPath, true))
            throw new IOException("Failed to delete earlier output " + finalPath + " for rename of " + file
                    + " in task " + attemptId);
        if (!fs.rename(file, finalPath))
            throw new IOException(
                    "Failed to save output " + finalPath + " for rename of " + file + " in task: " + attemptId);
    }
}

From source file:com.ibm.bi.dml.runtime.matrix.sort.SamplingSortMRInputFormat.java

License:Open Source License

/**
 * Use the input splits to take samples of the input and generate sample
 * keys. By default reads 100,000 keys from 10 locations in the input, sorts
 * them and picks N-1 keys to generate N equally sized partitions.
 * @param conf the job to sample/*from w  w  w  . j  ava2 s.  c om*/
 * @param partFile where to write the output file to
 * @throws IOException if something goes wrong
* @throws IllegalAccessException 
* @throws InstantiationException 
 */
@SuppressWarnings({ "unchecked", "unused", "deprecation" })
public static int writePartitionFile(JobConf conf, Path partFile)
        throws IOException, InstantiationException, IllegalAccessException {
    SamplingSortMRInputFormat inFormat = new SamplingSortMRInputFormat();
    Sampler sampler = new Sampler();

    Class<? extends WritableComparable> targetKeyClass;
    targetKeyClass = (Class<? extends WritableComparable>) conf.getClass(TARGET_KEY_CLASS,
            WritableComparable.class);
    //get input converter information
    int brlen = MRJobConfiguration.getNumRowsPerBlock(conf, (byte) 0);
    int bclen = MRJobConfiguration.getNumColumnsPerBlock(conf, (byte) 0);

    //indicate whether the matrix value in this mapper is a matrix cell or a matrix block
    int partitions = conf.getNumReduceTasks();

    long sampleSize = conf.getLong(SAMPLE_SIZE, 1000);
    InputSplit[] splits = inFormat.getSplits(conf, conf.getNumMapTasks());
    int samples = Math.min(10, splits.length);
    long recordsPerSample = sampleSize / samples;
    int sampleStep = splits.length / samples;
    // take N samples from different parts of the input

    int totalcount = 0;
    for (int i = 0; i < samples; ++i) {
        SequenceFileRecordReader reader = (SequenceFileRecordReader) inFormat
                .getRecordReader(splits[sampleStep * i], conf, null);
        int count = 0;
        WritableComparable key = (WritableComparable) reader.createKey();
        Writable value = (Writable) reader.createValue();
        while (reader.next(key, value) && count < recordsPerSample) {
            Converter inputConverter = MRJobConfiguration.getInputConverter(conf, (byte) 0);
            inputConverter.setBlockSize(brlen, bclen);
            inputConverter.convert(key, value);
            while (inputConverter.hasNext()) {
                Pair pair = inputConverter.next();
                if (pair.getKey() instanceof DoubleWritable) {
                    sampler.addValue(new DoubleWritable(((DoubleWritable) pair.getKey()).get()));
                } else if (pair.getValue() instanceof MatrixCell) {
                    sampler.addValue(new DoubleWritable(((MatrixCell) pair.getValue()).getValue()));
                } else
                    throw new IOException("SamplingSortMRInputFormat unsupported key/value class: "
                            + pair.getKey().getClass() + ":" + pair.getValue().getClass());

                count++;
            }
            key = (WritableComparable) reader.createKey();
            value = (Writable) reader.createValue();
        }
        totalcount += count;
    }

    if (totalcount == 0) //empty input files
        sampler.addValue(new DoubleWritable(0));

    FileSystem outFs = partFile.getFileSystem(conf);
    if (outFs.exists(partFile)) {
        outFs.delete(partFile, false);
    }

    //note: key value always double/null as expected by partitioner
    SequenceFile.Writer writer = SequenceFile.createWriter(outFs, conf, partFile, DoubleWritable.class,
            NullWritable.class);
    NullWritable nullValue = NullWritable.get();
    int index0 = -1, i = 0;
    boolean lessthan0 = true;
    for (WritableComparable splitValue : sampler.createPartitions(partitions)) {
        writer.append(splitValue, nullValue);
        if (lessthan0 && ((DoubleWritable) splitValue).get() >= 0) {
            index0 = i;
            lessthan0 = false;
        }
        i++;
    }
    if (lessthan0)
        index0 = partitions - 1;
    writer.close();

    return index0;
}

From source file:com.ibm.bi.dml.runtime.transform.ApplyTfCSVMR.java

License:Open Source License

public static JobReturn runJob(String inputPath, String specPath, String mapsPath, String tmpPath,
        String outputPath, String partOffsetsFile, CSVFileFormatProperties inputDataProperties, long numCols,
        int replication, String headerLine) throws IOException, ClassNotFoundException, InterruptedException {
    JobConf job = new JobConf(ApplyTfCSVMR.class);
    job.setJobName("ApplyTfCSV");

    /* Setup MapReduce Job */
    job.setJarByClass(ApplyTfCSVMR.class);

    // set relevant classes
    job.setMapperClass(ApplyTfCSVMapper.class);
    job.setNumReduceTasks(0);/*from   ww w  .ja v  a  2 s .  c  o  m*/

    // Add transformation metadata file as well as partOffsetsFile to Distributed cache
    DistributedCache.addCacheFile((new Path(mapsPath)).toUri(), job);
    DistributedCache.createSymlink(job);

    Path cachefile = new Path(partOffsetsFile);
    DistributedCache.addCacheFile(cachefile.toUri(), job);
    DistributedCache.createSymlink(job);

    // set input and output properties
    job.setInputFormat(TextInputFormat.class);
    job.setOutputFormat(TextOutputFormat.class);

    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(Text.class);

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);

    job.setInt("dfs.replication", replication);

    FileInputFormat.addInputPath(job, new Path(inputPath));
    // delete outputPath, if exists already.
    Path outPath = new Path(outputPath);
    FileSystem fs = FileSystem.get(job);
    fs.delete(outPath, true);
    FileOutputFormat.setOutputPath(job, outPath);

    job.set(MRJobConfiguration.TF_HAS_HEADER, Boolean.toString(inputDataProperties.hasHeader()));
    job.set(MRJobConfiguration.TF_DELIM, inputDataProperties.getDelim());
    if (inputDataProperties.getNAStrings() != null)
        // Adding "dummy" string to handle the case of na_strings = ""
        job.set(MRJobConfiguration.TF_NA_STRINGS, TfUtils.prepNAStrings(inputDataProperties.getNAStrings()));
    job.set(MRJobConfiguration.TF_SPEC_FILE, specPath);
    job.set(MRJobConfiguration.TF_SMALLEST_FILE, CSVReblockMR.findSmallestFile(job, inputPath));
    job.set(MRJobConfiguration.OUTPUT_MATRICES_DIRS_CONFIG, outputPath);
    job.setLong(MRJobConfiguration.TF_NUM_COLS, numCols);
    job.set(MRJobConfiguration.TF_TXMTD_PATH, mapsPath);
    job.set(MRJobConfiguration.TF_HEADER, headerLine);
    job.set(CSVReblockMR.ROWID_FILE_NAME, cachefile.toString());
    job.set(MRJobConfiguration.TF_TMP_LOC, tmpPath);

    //turn off adaptivemr
    job.setBoolean("adaptivemr.map.enable", false);

    // Run the job
    RunningJob runjob = JobClient.runJob(job);

    // Since transform CSV produces part files w/ prefix transform-part-*,
    // delete all the "default" part-..... files
    deletePartFiles(fs, outPath);

    MatrixCharacteristics mc = new MatrixCharacteristics();
    return new JobReturn(new MatrixCharacteristics[] { mc }, runjob.isSuccessful());
}

From source file:com.ibm.bi.dml.runtime.transform.ApplyTfCSVMR.java

License:Open Source License

private static void deletePartFiles(FileSystem fs, Path path) throws FileNotFoundException, IOException {
    PathFilter filter = new PathFilter() {
        public boolean accept(Path file) {
            return file.getName().startsWith("part-");
        }//from w  ww  . j av a 2  s .c o  m
    };
    FileStatus[] list = fs.listStatus(path, filter);
    for (FileStatus stat : list) {
        fs.delete(stat.getPath(), false);
    }
}