Example usage for org.apache.hadoop.fs FileSystem delete

List of usage examples for org.apache.hadoop.fs FileSystem delete

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem delete.

Prototype

public abstract boolean delete(Path f, boolean recursive) throws IOException;

Source Link

Document

Delete a file.

Usage

From source file:com.hp.hpl.jena.sparql.algebra.MyOpVisitor.java

License:Open Source License

public void execute() {

    Configuration conf = new Configuration();
    FileSystem fs = null;
    try {//from  w ww  . j  a  v a  2  s . com
        fs = FileSystem.get(conf);
        Path out = new Path("output");
        if (!fs.exists(out)) {
            fs.delete(out, true);
            fs.mkdirs(out);
        }
    } catch (IOException e) {
        e.printStackTrace();
    }

    Triple[] Q = new Triple[0];
    Q = opBGP.getPattern().getList().toArray(Q);
    Set<Var> vars = PatternVars.vars(query.getQueryPattern());

    JoinPlaner.setid(id);
    JoinPlaner.newVaRS(vars);
    try {
        JoinPlaner.form(Q);
        JoinPlaner.removeNonJoiningVaribles(Q);
        int i = 0;
        while (!JoinPlaner.isEmpty()) {
            String v = JoinPlaner.getNextJoin();
            System.out.println(v);
            i++;
        }
        if (i == 0) {
            Path outFile = new Path("output/Join_" + id + "_" + 0);
            OutputBuffer out = new OutputBuffer(outFile, fs);
            //if (fs.exists(outFile)) {
            //   fs.delete(outFile,true);
            //}
            //fs.create(outFile);
            QueryProcessor.executeSelect(Q[0], out, "P0");
        }
    } catch (Exception e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }

}

From source file:com.ibm.bi.dml.runtime.io.WriterMatrixMarket.java

License:Open Source License

/**
 * //  w ww  .  j  a  va  2s. co  m
 * @param srcFileName
 * @param fileName
 * @param rlen
 * @param clen
 * @param nnz
 * @throws IOException
 */
public void mergeTextcellToMatrixMarket(String srcFileName, String fileName, long rlen, long clen, long nnz)
        throws IOException {
    Configuration conf = new Configuration(ConfigurationManager.getCachedJobConf());

    Path src = new Path(srcFileName);
    Path merge = new Path(fileName);
    FileSystem hdfs = FileSystem.get(conf);

    if (hdfs.exists(merge)) {
        hdfs.delete(merge, true);
    }

    OutputStream out = hdfs.create(merge, true);

    // write out the header first 
    StringBuilder sb = new StringBuilder();
    sb.append("%%MatrixMarket matrix coordinate real general\n");

    // output number of rows, number of columns and number of nnz
    sb.append(rlen + " " + clen + " " + nnz + "\n");
    out.write(sb.toString().getBytes());

    // if the source is a directory
    if (hdfs.getFileStatus(src).isDirectory()) {
        try {
            FileStatus[] contents = hdfs.listStatus(src);
            for (int i = 0; i < contents.length; i++) {
                if (!contents[i].isDirectory()) {
                    InputStream in = hdfs.open(contents[i].getPath());
                    try {
                        IOUtils.copyBytes(in, out, conf, false);
                    } finally {
                        IOUtilFunctions.closeSilently(in);
                    }
                }
            }
        } finally {
            IOUtilFunctions.closeSilently(out);
        }
    } else if (hdfs.isFile(src)) {
        InputStream in = null;
        try {
            in = hdfs.open(src);
            IOUtils.copyBytes(in, out, conf, true);
        } finally {
            IOUtilFunctions.closeSilently(in);
            IOUtilFunctions.closeSilently(out);
        }
    } else {
        throw new IOException(src.toString() + ": No such file or directory");
    }
}

From source file:com.ibm.bi.dml.runtime.io.WriterTextCSV.java

License:Open Source License

/**
 * Method to merge multiple CSV part files on HDFS into a single CSV file on HDFS. 
 * The part files are created by CSV_WRITE MR job. 
 * //w  w w.  j  a v  a2s. com
 * This method is invoked from CP-write instruction.
 * 
 * @param srcFileName
 * @param destFileName
 * @param csvprop
 * @param rlen
 * @param clen
 * @throws IOException
 */
public void mergeCSVPartFiles(String srcFileName, String destFileName, CSVFileFormatProperties csvprop,
        long rlen, long clen) throws IOException {
    Configuration conf = new Configuration(ConfigurationManager.getCachedJobConf());

    Path srcFilePath = new Path(srcFileName);
    Path mergedFilePath = new Path(destFileName);
    FileSystem hdfs = FileSystem.get(conf);

    if (hdfs.exists(mergedFilePath)) {
        hdfs.delete(mergedFilePath, true);
    }
    OutputStream out = hdfs.create(mergedFilePath, true);

    // write out the header, if needed
    if (csvprop.hasHeader()) {
        StringBuilder sb = new StringBuilder();
        for (int i = 0; i < clen; i++) {
            sb.append("C" + (i + 1));
            if (i < clen - 1)
                sb.append(csvprop.getDelim());
        }
        sb.append('\n');
        out.write(sb.toString().getBytes());
        sb.setLength(0);
    }

    // if the source is a directory
    if (hdfs.isDirectory(srcFilePath)) {
        try {
            FileStatus[] contents = hdfs.listStatus(srcFilePath);
            Path[] partPaths = new Path[contents.length];
            int numPartFiles = 0;
            for (int i = 0; i < contents.length; i++) {
                if (!contents[i].isDirectory()) {
                    partPaths[i] = contents[i].getPath();
                    numPartFiles++;
                }
            }
            Arrays.sort(partPaths);

            for (int i = 0; i < numPartFiles; i++) {
                InputStream in = hdfs.open(partPaths[i]);
                try {
                    IOUtils.copyBytes(in, out, conf, false);
                    if (i < numPartFiles - 1)
                        out.write('\n');
                } finally {
                    IOUtilFunctions.closeSilently(in);
                }
            }
        } finally {
            IOUtilFunctions.closeSilently(out);
        }
    } else if (hdfs.isFile(srcFilePath)) {
        InputStream in = null;
        try {
            in = hdfs.open(srcFilePath);
            IOUtils.copyBytes(in, out, conf, true);
        } finally {
            IOUtilFunctions.closeSilently(in);
            IOUtilFunctions.closeSilently(out);
        }
    } else {
        throw new IOException(srcFilePath.toString() + ": No such file or directory");
    }
}

From source file:com.ibm.bi.dml.runtime.io.WriterTextCSV.java

License:Open Source License

/**
 * /*from  w  w  w  .j av  a2 s . c  o m*/
 * @param srcFileName
 * @param destFileName
 * @param csvprop
 * @param rlen
 * @param clen
 * @throws IOException
 */
@SuppressWarnings("unchecked")
public void addHeaderToCSV(String srcFileName, String destFileName, long rlen, long clen) throws IOException {
    Configuration conf = new Configuration(ConfigurationManager.getCachedJobConf());

    Path srcFilePath = new Path(srcFileName);
    Path destFilePath = new Path(destFileName);
    FileSystem hdfs = FileSystem.get(conf);

    if (!_props.hasHeader()) {
        // simply move srcFile to destFile

        /*
         * TODO: Remove this roundabout way! 
         * For example: destFilePath = /user/biadmin/csv/temp/out/file.csv 
         *              & the only path that exists already on HDFS is /user/biadmin/csv/.
         * In this case: the directory structure /user/biadmin/csv/temp/out must be created. 
         * Simple hdfs.rename() does not seem to create this directory structure.
         */

        // delete the destination file, if exists already
        //boolean ret1 = 
        hdfs.delete(destFilePath, true);

        // Create /user/biadmin/csv/temp/out/file.csv so that ..../temp/out/ is created.
        //boolean ret2 = 
        hdfs.createNewFile(destFilePath);

        // delete the file "file.csv" but preserve the directory structure /user/biadmin/csv/temp/out/
        //boolean ret3 = 
        hdfs.delete(destFilePath, true);

        // finally, move the data to destFilePath = /user/biadmin/csv/temp/out/file.csv
        //boolean ret4 = 
        hdfs.rename(srcFilePath, destFilePath);

        //System.out.println("Return values = del:" + ret1 + ", createNew:" + ret2 + ", del:" + ret3 + ", rename:" + ret4);
        return;
    }

    // construct the header line
    StringBuilder sb = new StringBuilder();
    for (int i = 0; i < clen; i++) {
        sb.append("C" + (i + 1));
        if (i < clen - 1)
            sb.append(_props.getDelim());
    }
    sb.append('\n');

    if (hdfs.isDirectory(srcFilePath)) {

        // compute sorted order among part files
        ArrayList<Path> files = new ArrayList<Path>();
        for (FileStatus stat : hdfs.listStatus(srcFilePath, CSVReblockMR.hiddenFileFilter))
            files.add(stat.getPath());
        Collections.sort(files);

        // first part file path
        Path firstpart = files.get(0);

        // create a temp file, and add header and contents of first part
        Path tmp = new Path(firstpart.toString() + ".tmp");
        OutputStream out = hdfs.create(tmp, true);
        out.write(sb.toString().getBytes());
        sb.setLength(0);

        // copy rest of the data from firstpart
        InputStream in = null;
        try {
            in = hdfs.open(firstpart);
            IOUtils.copyBytes(in, out, conf, true);
        } finally {
            IOUtilFunctions.closeSilently(in);
            IOUtilFunctions.closeSilently(out);
        }

        // rename tmp to firstpart
        hdfs.delete(firstpart, true);
        hdfs.rename(tmp, firstpart);

        // rename srcfile to destFile
        hdfs.delete(destFilePath, true);
        hdfs.createNewFile(destFilePath); // force the creation of directory structure
        hdfs.delete(destFilePath, true); // delete the file, but preserve the directory structure
        hdfs.rename(srcFilePath, destFilePath); // move the data 

    } else if (hdfs.isFile(srcFilePath)) {
        // create destination file
        OutputStream out = hdfs.create(destFilePath, true);

        // write header
        out.write(sb.toString().getBytes());
        sb.setLength(0);

        // copy the data from srcFile
        InputStream in = null;
        try {
            in = hdfs.open(srcFilePath);
            IOUtils.copyBytes(in, out, conf, true);
        } finally {
            IOUtilFunctions.closeSilently(in);
            IOUtilFunctions.closeSilently(out);
        }
    } else {
        throw new IOException(srcFilePath.toString() + ": No such file or directory");
    }
}

From source file:com.ibm.bi.dml.runtime.matrix.data.MultipleOutputCommitter.java

License:Open Source License

@Override
public void cleanupJob(JobContext context) throws IOException {
    JobConf conf = context.getJobConf();
    // do the clean up of temporary directory
    Path outputPath = FileOutputFormat.getOutputPath(conf);
    if (outputPath != null) {
        FileSystem fs = outputPath.getFileSystem(conf);
        context.getProgressible().progress();
        if (fs.exists(outputPath))
            fs.delete(outputPath, true);
    }//  w  w  w .j a  va2s  .  c o m
}

From source file:com.ibm.bi.dml.runtime.matrix.data.MultipleOutputCommitter.java

License:Open Source License

@Override
public void commitTask(TaskAttemptContext context) throws IOException {
    JobConf conf = context.getJobConf();
    TaskAttemptID attemptId = context.getTaskAttemptID();

    // get the mapping between index to output filename
    outputs = MRJobConfiguration.getOutputs(conf);

    //get temp task output path (compatible with hadoop1 and hadoop2)
    Path taskOutPath = FileOutputFormat.getWorkOutputPath(conf);
    FileSystem fs = taskOutPath.getFileSystem(conf);
    if (!fs.exists(taskOutPath))
        throw new IOException("Task output path " + taskOutPath.toString() + "does not exist.");

    // Move the task outputs to their final places
    context.getProgressible().progress();
    moveFinalTaskOutputs(context, fs, taskOutPath);

    // Delete the temporary task-specific output directory
    if (!fs.delete(taskOutPath, true))
        LOG.debug(/*from   w w w.ja v a 2 s.  c  om*/
                "Failed to delete the temporary output directory of task: " + attemptId + " - " + taskOutPath);
}

From source file:com.ibm.bi.dml.runtime.matrix.data.MultipleOutputCommitter.java

License:Open Source License

/**
 * //from   w w  w . ja v a 2s  .  c o m
 * @param context
 * @param fs
 * @param file
 * @throws IOException
 */
private void moveFileToDestination(TaskAttemptContext context, FileSystem fs, Path file) throws IOException {
    JobConf conf = context.getJobConf();
    TaskAttemptID attemptId = context.getTaskAttemptID();

    //get output index and final destination
    String taskType = (conf.getBoolean(JobContext.TASK_ISMAP, true)) ? "m" : "r";
    String name = file.getName();
    int charIx = name.indexOf("-" + taskType + "-");
    int index = Integer.parseInt(name.substring(0, charIx));
    Path finalPath = new Path(outputs[index], file.getName());

    //move file from 'file' to 'finalPath'
    if (!fs.rename(file, finalPath)) {
        if (!fs.delete(finalPath, true))
            throw new IOException("Failed to delete earlier output " + finalPath + " for rename of " + file
                    + " in task " + attemptId);
        if (!fs.rename(file, finalPath))
            throw new IOException(
                    "Failed to save output " + finalPath + " for rename of " + file + " in task: " + attemptId);
    }
}

From source file:com.ibm.bi.dml.runtime.matrix.sort.SamplingSortMRInputFormat.java

License:Open Source License

/**
 * Use the input splits to take samples of the input and generate sample
 * keys. By default reads 100,000 keys from 10 locations in the input, sorts
 * them and picks N-1 keys to generate N equally sized partitions.
 * @param conf the job to sample/*from w  w  w  . j  ava2 s.  c om*/
 * @param partFile where to write the output file to
 * @throws IOException if something goes wrong
* @throws IllegalAccessException 
* @throws InstantiationException 
 */
@SuppressWarnings({ "unchecked", "unused", "deprecation" })
public static int writePartitionFile(JobConf conf, Path partFile)
        throws IOException, InstantiationException, IllegalAccessException {
    SamplingSortMRInputFormat inFormat = new SamplingSortMRInputFormat();
    Sampler sampler = new Sampler();

    Class<? extends WritableComparable> targetKeyClass;
    targetKeyClass = (Class<? extends WritableComparable>) conf.getClass(TARGET_KEY_CLASS,
            WritableComparable.class);
    //get input converter information
    int brlen = MRJobConfiguration.getNumRowsPerBlock(conf, (byte) 0);
    int bclen = MRJobConfiguration.getNumColumnsPerBlock(conf, (byte) 0);

    //indicate whether the matrix value in this mapper is a matrix cell or a matrix block
    int partitions = conf.getNumReduceTasks();

    long sampleSize = conf.getLong(SAMPLE_SIZE, 1000);
    InputSplit[] splits = inFormat.getSplits(conf, conf.getNumMapTasks());
    int samples = Math.min(10, splits.length);
    long recordsPerSample = sampleSize / samples;
    int sampleStep = splits.length / samples;
    // take N samples from different parts of the input

    int totalcount = 0;
    for (int i = 0; i < samples; ++i) {
        SequenceFileRecordReader reader = (SequenceFileRecordReader) inFormat
                .getRecordReader(splits[sampleStep * i], conf, null);
        int count = 0;
        WritableComparable key = (WritableComparable) reader.createKey();
        Writable value = (Writable) reader.createValue();
        while (reader.next(key, value) && count < recordsPerSample) {
            Converter inputConverter = MRJobConfiguration.getInputConverter(conf, (byte) 0);
            inputConverter.setBlockSize(brlen, bclen);
            inputConverter.convert(key, value);
            while (inputConverter.hasNext()) {
                Pair pair = inputConverter.next();
                if (pair.getKey() instanceof DoubleWritable) {
                    sampler.addValue(new DoubleWritable(((DoubleWritable) pair.getKey()).get()));
                } else if (pair.getValue() instanceof MatrixCell) {
                    sampler.addValue(new DoubleWritable(((MatrixCell) pair.getValue()).getValue()));
                } else
                    throw new IOException("SamplingSortMRInputFormat unsupported key/value class: "
                            + pair.getKey().getClass() + ":" + pair.getValue().getClass());

                count++;
            }
            key = (WritableComparable) reader.createKey();
            value = (Writable) reader.createValue();
        }
        totalcount += count;
    }

    if (totalcount == 0) //empty input files
        sampler.addValue(new DoubleWritable(0));

    FileSystem outFs = partFile.getFileSystem(conf);
    if (outFs.exists(partFile)) {
        outFs.delete(partFile, false);
    }

    //note: key value always double/null as expected by partitioner
    SequenceFile.Writer writer = SequenceFile.createWriter(outFs, conf, partFile, DoubleWritable.class,
            NullWritable.class);
    NullWritable nullValue = NullWritable.get();
    int index0 = -1, i = 0;
    boolean lessthan0 = true;
    for (WritableComparable splitValue : sampler.createPartitions(partitions)) {
        writer.append(splitValue, nullValue);
        if (lessthan0 && ((DoubleWritable) splitValue).get() >= 0) {
            index0 = i;
            lessthan0 = false;
        }
        i++;
    }
    if (lessthan0)
        index0 = partitions - 1;
    writer.close();

    return index0;
}

From source file:com.ibm.bi.dml.runtime.transform.ApplyTfCSVMR.java

License:Open Source License

public static JobReturn runJob(String inputPath, String specPath, String mapsPath, String tmpPath,
        String outputPath, String partOffsetsFile, CSVFileFormatProperties inputDataProperties, long numCols,
        int replication, String headerLine) throws IOException, ClassNotFoundException, InterruptedException {
    JobConf job = new JobConf(ApplyTfCSVMR.class);
    job.setJobName("ApplyTfCSV");

    /* Setup MapReduce Job */
    job.setJarByClass(ApplyTfCSVMR.class);

    // set relevant classes
    job.setMapperClass(ApplyTfCSVMapper.class);
    job.setNumReduceTasks(0);/*from   ww w  .ja v  a  2 s .  c  o  m*/

    // Add transformation metadata file as well as partOffsetsFile to Distributed cache
    DistributedCache.addCacheFile((new Path(mapsPath)).toUri(), job);
    DistributedCache.createSymlink(job);

    Path cachefile = new Path(partOffsetsFile);
    DistributedCache.addCacheFile(cachefile.toUri(), job);
    DistributedCache.createSymlink(job);

    // set input and output properties
    job.setInputFormat(TextInputFormat.class);
    job.setOutputFormat(TextOutputFormat.class);

    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(Text.class);

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);

    job.setInt("dfs.replication", replication);

    FileInputFormat.addInputPath(job, new Path(inputPath));
    // delete outputPath, if exists already.
    Path outPath = new Path(outputPath);
    FileSystem fs = FileSystem.get(job);
    fs.delete(outPath, true);
    FileOutputFormat.setOutputPath(job, outPath);

    job.set(MRJobConfiguration.TF_HAS_HEADER, Boolean.toString(inputDataProperties.hasHeader()));
    job.set(MRJobConfiguration.TF_DELIM, inputDataProperties.getDelim());
    if (inputDataProperties.getNAStrings() != null)
        // Adding "dummy" string to handle the case of na_strings = ""
        job.set(MRJobConfiguration.TF_NA_STRINGS, TfUtils.prepNAStrings(inputDataProperties.getNAStrings()));
    job.set(MRJobConfiguration.TF_SPEC_FILE, specPath);
    job.set(MRJobConfiguration.TF_SMALLEST_FILE, CSVReblockMR.findSmallestFile(job, inputPath));
    job.set(MRJobConfiguration.OUTPUT_MATRICES_DIRS_CONFIG, outputPath);
    job.setLong(MRJobConfiguration.TF_NUM_COLS, numCols);
    job.set(MRJobConfiguration.TF_TXMTD_PATH, mapsPath);
    job.set(MRJobConfiguration.TF_HEADER, headerLine);
    job.set(CSVReblockMR.ROWID_FILE_NAME, cachefile.toString());
    job.set(MRJobConfiguration.TF_TMP_LOC, tmpPath);

    //turn off adaptivemr
    job.setBoolean("adaptivemr.map.enable", false);

    // Run the job
    RunningJob runjob = JobClient.runJob(job);

    // Since transform CSV produces part files w/ prefix transform-part-*,
    // delete all the "default" part-..... files
    deletePartFiles(fs, outPath);

    MatrixCharacteristics mc = new MatrixCharacteristics();
    return new JobReturn(new MatrixCharacteristics[] { mc }, runjob.isSuccessful());
}

From source file:com.ibm.bi.dml.runtime.transform.ApplyTfCSVMR.java

License:Open Source License

private static void deletePartFiles(FileSystem fs, Path path) throws FileNotFoundException, IOException {
    PathFilter filter = new PathFilter() {
        public boolean accept(Path file) {
            return file.getName().startsWith("part-");
        }//from w  ww  . j av a 2  s .c o  m
    };
    FileStatus[] list = fs.listStatus(path, filter);
    for (FileStatus stat : list) {
        fs.delete(stat.getPath(), false);
    }
}