Example usage for org.apache.hadoop.fs FileSystem delete

List of usage examples for org.apache.hadoop.fs FileSystem delete

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem delete.

Prototype

public abstract boolean delete(Path f, boolean recursive) throws IOException;

Source Link

Document

Delete a file.

Usage

From source file:co.cask.cdap.data.hbase.HBase94Test.java

License:Apache License

@Override
public HRegion createHRegion(byte[] tableName, byte[] startKey, byte[] stopKey, String callingMethod,
        Configuration conf, byte[]... families) throws IOException {
    if (conf == null) {
        conf = new Configuration();
    }//  w ww  . j  a va2  s .  c  o m
    HTableDescriptor htd = new HTableDescriptor(tableName);
    for (byte[] family : families) {
        htd.addFamily(new HColumnDescriptor(family));
    }
    HRegionInfo info = new HRegionInfo(htd.getName(), startKey, stopKey, false);
    Path path = new Path(conf.get(HConstants.HBASE_DIR), callingMethod);
    FileSystem fs = FileSystem.get(conf);
    if (fs.exists(path)) {
        if (!fs.delete(path, true)) {
            throw new IOException("Failed delete of " + path);
        }
    }
    return HRegion.createHRegion(info, path, conf, htd);
}

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.partitioned.DynamicPartitioningOutputCommitter.java

License:Apache License

@Override
public void cleanupJob(JobContext context) throws IOException {
    FileSystem fs = jobSpecificOutputPath.getFileSystem(context.getConfiguration());
    fs.delete(jobSpecificOutputPath, true);
}

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.partitioned.DynamicPartitioningOutputCommitter.java

License:Apache License

/**
 * Merge two paths together.  Anything in from will be moved into to, if there
 * are any name conflicts while merging the files or directories in from win.
 * @param fs the File System to use//from  w  w  w.  ja v a  2 s .co m
 * @param from the path data is coming from.
 * @param to the path data is going to.
 * @throws IOException on any error
 */
private void mergePaths(FileSystem fs, final FileStatus from, final Path to) throws IOException {
    if (from.isFile()) {
        if (fs.exists(to)) {
            if (!fs.delete(to, true)) {
                throw new IOException("Failed to delete " + to);
            }
        }

        if (!fs.rename(from.getPath(), to)) {
            throw new IOException("Failed to rename " + from + " to " + to);
        }
    } else if (from.isDirectory()) {
        if (fs.exists(to)) {
            FileStatus toStat = fs.getFileStatus(to);
            if (!toStat.isDirectory()) {
                if (!fs.delete(to, true)) {
                    throw new IOException("Failed to delete " + to);
                }
                if (!fs.rename(from.getPath(), to)) {
                    throw new IOException("Failed to rename " + from + " to " + to);
                }
            } else {
                //It is a directory so merge everything in the directories
                for (FileStatus subFrom : fs.listStatus(from.getPath())) {
                    Path subTo = new Path(to, subFrom.getPath().getName());
                    mergePaths(fs, subFrom, subTo);
                }
            }
        } else {
            //it does not exist just rename
            if (!fs.rename(from.getPath(), to)) {
                throw new IOException("Failed to rename " + from + " to " + to);
            }
        }
    }
}

From source file:co.cask.hydrator.plugin.batch.action.FileAction.java

License:Apache License

@SuppressWarnings("ConstantConditions")
@Override//w  ww.  j a  va 2 s  .  c o m
public void run(BatchActionContext context) throws Exception {
    if (!config.shouldRun(context)) {
        return;
    }
    config.substituteMacros(context);

    Job job = JobUtils.createInstance();
    Configuration conf = job.getConfiguration();
    FileSystem fileSystem = FileSystem.get(conf);
    Path[] paths;
    Path sourcePath = new Path(config.path);
    if (fileSystem.isDirectory(sourcePath)) {
        FileStatus[] status = fileSystem.listStatus(sourcePath);
        paths = FileUtil.stat2Paths(status);
    } else {
        paths = new Path[] { sourcePath };
    }

    //get regex pattern for file name filtering.
    boolean patternSpecified = !Strings.isNullOrEmpty(config.pattern);
    if (patternSpecified) {
        regex = Pattern.compile(config.pattern);
    }

    switch (config.action.toLowerCase()) {
    case "delete":
        for (Path path : paths) {
            if (!patternSpecified || isFileNameMatch(path.getName())) {
                fileSystem.delete(path, true);
            }
        }
        break;
    case "move":
        for (Path path : paths) {
            if (!patternSpecified || isFileNameMatch(path.getName())) {
                Path targetFileMovePath = new Path(config.targetFolder, path.getName());
                fileSystem.rename(path, targetFileMovePath);
            }
        }
        break;
    case "archive":
        for (Path path : paths) {
            if (!patternSpecified || isFileNameMatch(path.getName())) {
                try (FSDataOutputStream archivedStream = fileSystem
                        .create(new Path(config.targetFolder, path.getName() + ".zip"));
                        ZipOutputStream zipArchivedStream = new ZipOutputStream(archivedStream);
                        FSDataInputStream fdDataInputStream = fileSystem.open(path)) {
                    zipArchivedStream.putNextEntry(new ZipEntry(path.getName()));
                    int length;
                    byte[] buffer = new byte[1024];
                    while ((length = fdDataInputStream.read(buffer)) > 0) {
                        zipArchivedStream.write(buffer, 0, length);
                    }
                    zipArchivedStream.closeEntry();
                }
                fileSystem.delete(path, true);
            }
        }
        break;
    default:
        LOG.warn("No action required on the file.");
        break;
    }
}

From source file:colossal.pipe.ColFile.java

License:Apache License

public void clearAndPrepareOutput(Configuration conf) {
    try {//  w w w.j av a  2s . c om
        Path dfsPath = new Path(path);
        FileSystem fs = dfsPath.getFileSystem(conf);
        if (fs.exists(dfsPath)) {
            FileStatus[] statuses = fs.listStatus(dfsPath);
            for (FileStatus status : statuses) {
                if (status.isDir()) {
                    if (!status.getPath().toString().endsWith("/_logs")
                            && !status.getPath().toString().endsWith("/_temporary")) {
                        throw new IllegalArgumentException(
                                "Trying to overwrite directory with child directories: " + path);
                    }
                }
            }
        } else {
            fs.mkdirs(dfsPath);
        }
        fs.delete(dfsPath, true);
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}

From source file:com.alexholmes.hadooputils.sort.SortInputSampler.java

License:Apache License

public static <K, V> void writePartitionFile(JobConf job, Sampler<K, V> sampler) throws IOException {
    Configuration conf = job;/* w w  w .  j av a  2  s . com*/
    // Use the input format defined in the job. NOT, the one provided by
    // the parent class's writePartitionFile() method, which will be a plain
    // TextInputFormat, by default
    final InputFormat inf = job.getInputFormat();
    int numPartitions = job.getNumReduceTasks();
    K[] samples = (K[]) sampler.getSample(inf, job);
    RawComparator<K> comparator = (RawComparator<K>) job.getOutputKeyComparator();
    Arrays.sort(samples, comparator);
    Path dst = new Path(TotalOrderPartitioner.getPartitionFile(job));
    FileSystem fs = dst.getFileSystem(conf);
    if (fs.exists(dst)) {
        fs.delete(dst, false);
    }
    SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, dst, job.getMapOutputKeyClass(),
            NullWritable.class);
    NullWritable nullValue = NullWritable.get();
    float stepSize = samples.length / (float) numPartitions;
    int last = -1;
    for (int i = 1; i < numPartitions; ++i) {
        int k = Math.round(stepSize * i);
        while (last >= k && comparator.compare(samples[last], samples[k]) == 0) {
            ++k;
        }
        writer.append(samples[k], nullValue);
        last = k;
    }
    writer.close();
}

From source file:com.alexholmes.hdfsslurper.WorkerThread.java

License:Apache License

private void process(FileStatus srcFileStatus) throws IOException, InterruptedException {

    Path stagingFile = null;//ww  w  .  j a v a  2  s.  co  m
    FileSystem destFs = null;
    String filenameBatchidDelimiter = config.getFileNameBatchIdDelimiter();

    try {
        FileSystem srcFs = srcFileStatus.getPath().getFileSystem(config.getConfig());

        // run a script which can change the name of the file as well as
        // write out a new version of the file
        //
        if (config.getWorkScript() != null) {
            Path newSrcFile = stageSource(srcFileStatus);
            srcFileStatus = srcFileStatus.getPath().getFileSystem(config.getConfig()).getFileStatus(newSrcFile);
        }

        Path srcFile = srcFileStatus.getPath();

        // get the target HDFS file
        //
        Path destFile = getHdfsTargetPath(srcFileStatus);

        if (config.getCodec() != null) {
            String ext = config.getCodec().getDefaultExtension();
            if (!destFile.getName().endsWith(ext)) {
                destFile = new Path(destFile.toString() + ext);
            }
        }

        destFs = destFile.getFileSystem(config.getConfig());

        // get the staging HDFS file
        //
        stagingFile = fileSystemManager.getStagingFile(srcFileStatus, destFile);
        String batchId = srcFile.toString().substring(
                srcFile.toString().lastIndexOf(filenameBatchidDelimiter) + 1, srcFile.toString().length());

        log.info("event#Copying source file '" + srcFile + "' to staging destination '" + stagingFile + "'"
                + "$batchId#" + batchId);

        // if the directory of the target file doesn't exist, attempt to
        // create it
        //
        Path destParentDir = destFile.getParent();
        if (!destFs.exists(destParentDir)) {
            log.info("event#Attempting creation of target directory: " + destParentDir.toUri());
            if (!destFs.mkdirs(destParentDir)) {
                throw new IOException("event#Failed to create target directory: " + destParentDir.toUri());
            }
        }

        // if the staging directory doesn't exist, attempt to create it
        //
        Path destStagingParentDir = stagingFile.getParent();
        if (!destFs.exists(destStagingParentDir)) {
            log.info("event#Attempting creation of staging directory: " + destStagingParentDir.toUri());
            if (!destFs.mkdirs(destStagingParentDir)) {
                throw new IOException("event#Failed to create staging directory: " + destParentDir.toUri());
            }
        }

        // copy the file
        //
        InputStream is = null;
        OutputStream os = null;
        CRC32 crc = new CRC32();
        try {
            is = new BufferedInputStream(srcFs.open(srcFile));
            if (config.isVerify()) {
                is = new CheckedInputStream(is, crc);
            }
            os = destFs.create(stagingFile);

            if (config.getCodec() != null) {
                os = config.getCodec().createOutputStream(os);
            }

            IOUtils.copyBytes(is, os, 4096, false);
        } finally {
            IOUtils.closeStream(is);
            IOUtils.closeStream(os);
        }

        long srcFileSize = srcFs.getFileStatus(srcFile).getLen();
        long destFileSize = destFs.getFileStatus(stagingFile).getLen();
        if (config.getCodec() == null && srcFileSize != destFileSize) {
            throw new IOException(
                    "event#File sizes don't match, source = " + srcFileSize + ", dest = " + destFileSize);
        }

        log.info("event#Local file size = " + srcFileSize + ", HDFS file size = " + destFileSize + "$batchId#"
                + batchId);

        if (config.isVerify()) {
            verify(stagingFile, crc.getValue());
        }

        if (destFs.exists(destFile)) {
            destFs.delete(destFile, false);
        }

        log.info("event#Moving staging file '" + stagingFile + "' to destination '" + destFile + "'"
                + "$batchId#" + batchId);
        if (!destFs.rename(stagingFile, destFile)) {
            throw new IOException("event#Failed to rename file");
        }

        if (config.isCreateLzopIndex() && destFile.getName().endsWith(lzopExt)) {
            Path lzoIndexPath = new Path(destFile.toString() + LzoIndex.LZO_INDEX_SUFFIX);
            if (destFs.exists(lzoIndexPath)) {
                log.info("event#Deleting index file as it already exists");
                destFs.delete(lzoIndexPath, false);
            }
            indexer.index(destFile);
        }

        fileSystemManager.fileCopyComplete(srcFileStatus);

    } catch (Throwable t) {
        log.error("event#Caught exception working on file " + srcFileStatus.getPath(), t);

        // delete the staging file if it still exists
        //
        try {
            if (destFs != null && destFs.exists(stagingFile)) {
                destFs.delete(stagingFile, false);
            }
        } catch (Throwable t2) {
            log.error("event#Failed to delete staging file " + stagingFile, t2);
        }

        fileSystemManager.fileCopyError(srcFileStatus);
    }

}

From source file:com.alibaba.jstorm.hdfs.spout.DirLock.java

License:Apache License

private static DirLock takeOwnership(FileSystem fs, Path dirLockFile) throws IOException {
    if (fs instanceof DistributedFileSystem) {
        if (!((DistributedFileSystem) fs).recoverLease(dirLockFile)) {
            LOG.warn("Unable to recover lease on dir lock file " + dirLockFile
                    + " right now. Cannot transfer ownership. Will need to try later.");
            return null;
        }// ww w .j  a va 2  s  .  c  o m
    }

    // delete and recreate lock file
    if (fs.delete(dirLockFile, false)) { // returns false if somebody else already deleted it (to take ownership)
        FSDataOutputStream ostream = HdfsUtils.tryCreateFile(fs, dirLockFile);
        if (ostream != null) {
            ostream.close();
        }
        return new DirLock(fs, dirLockFile);
    }
    return null;
}

From source file:com.alibaba.jstorm.hdfs.spout.FileLock.java

License:Apache License

/**
 * checks if lockFile is older than 'olderThan' UTC time by examining the modification time
 * on file and (if necessary) the timestamp in last log entry in the file. If its stale, then
 * returns the last log entry, else returns null.
 * @param fs/*from www. ja  v a 2 s.  c  om*/
 * @param lockFile
 * @param olderThan  time (millis) in UTC.
 * @return the last entry in the file if its too old. null if last entry is not too old
 * @throws IOException
 */
public static LogEntry getLastEntryIfStale(FileSystem fs, Path lockFile, long olderThan) throws IOException {
    long modifiedTime = fs.getFileStatus(lockFile).getModificationTime();
    if (modifiedTime <= olderThan) { // look
        //Impt: HDFS timestamp may not reflect recent appends, so we double check the
        // timestamp in last line of file to see when the last update was made
        LogEntry lastEntry = getLastEntry(fs, lockFile);
        if (lastEntry == null) {
            LOG.warn("Empty lock file found. Deleting it. {}", lockFile);
            try {
                if (!fs.delete(lockFile, false))
                    throw new IOException("Empty lock file deletion failed");
            } catch (Exception e) {
                LOG.error("Unable to delete empty lock file " + lockFile, e);
            }
        }
        if (lastEntry.eventTime <= olderThan)
            return lastEntry;
    }
    return null;
}

From source file:com.anhth12.lambda.ml.MLUpdate.java

@Override
public void runUpdate(JavaSparkContext sparkContext, long timestamp, JavaPairRDD<String, M> newKeyMessageData,
        JavaPairRDD<String, M> pastKeyMessageData, String modelDirString,
        TopicProducer<String, String> modelUpdateTopic) throws IOException, InterruptedException {

    Preconditions.checkNotNull(newKeyMessageData);

    JavaRDD<M> newData = newKeyMessageData.values();
    JavaRDD<M> pastData = pastKeyMessageData == null ? null : pastKeyMessageData.values();

    if (newData != null) {
        newData.cache();/* w w w.j a  v a 2  s  .  c  o m*/
        newData.foreachPartition(Functions.<Iterator<M>>noOp());
    }
    if (pastData != null) {
        pastData.cache();
        pastData.foreachPartition(Functions.<Iterator<M>>noOp());
    }

    List<HyperParamValues<?>> hyperParamValues = getHyperParamValues();

    int valuesPerHyperParam = HyperParams.chooseValuesPerHyperParam(hyperParamValues.size(), candidates);

    List<List<?>> hyperParameterCombos = HyperParams.chooseHyperParameterCombos(hyperParamValues, candidates,
            valuesPerHyperParam);

    FileSystem fs = FileSystem.get(sparkContext.hadoopConfiguration());

    Path modelDir = new Path(modelDirString);
    Path tempModelPath = new Path(modelDir, ".temporary");
    Path candiatesPath = new Path(tempModelPath, Long.toString(System.currentTimeMillis()));
    fs.mkdirs(candiatesPath);

    Path bestCandidatePath = findBestCandidatePath(sparkContext, newData, pastData, hyperParameterCombos,
            candiatesPath);

    Path finalPath = new Path(modelDir, Long.toString(System.currentTimeMillis()));
    if (bestCandidatePath == null) {
        log.info("Unable to build any model");
    } else {
        fs.rename(bestCandidatePath, finalPath);
    }

    fs.delete(candiatesPath, true);

    Path bestModelPath = new Path(finalPath, MODEL_FILE_NAME);

    if (fs.exists(bestModelPath)) {
        PMML bestModel;
        try (InputStream in = new GZIPInputStream(fs.open(finalPath), 1 << 16)) {
            bestModel = PMMLUtils.read(in);
        }

        modelUpdateTopic.send("MODEL", PMMLUtils.toString(bestModel));
        publishAdditionalModelData(sparkContext, bestModel, newData, pastData, candiatesPath, modelUpdateTopic);
    }

    if (newData != null) {
        newData.unpersist();
    }

    if (pastData != null) {
        pastData.unpersist();
    }

}