Example usage for org.apache.hadoop.fs FileSystem delete

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem delete.

Prototype

public abstract boolean delete(Path f, boolean recursive) throws IOException;

Source Link

Document

Delete a file.

Usage

From source file:com.mvad.flink.demo.streaming.lib.sink.bucketing.BucketingSink.java

License:Apache License

/**
 * Gets the truncate() call using reflection.
 *
 * <p>/*from  w  w  w .  j  a va 2  s  .  c  o m*/
 * Note: This code comes from Flume
 */
private Method reflectTruncate(FileSystem fs) {
    Method m = null;
    if (fs != null) {
        Class<?> fsClass = fs.getClass();
        try {
            m = fsClass.getMethod("truncate", Path.class, long.class);
        } catch (NoSuchMethodException ex) {
            LOG.debug(
                    "Truncate not found. Will write a file with suffix '{}' "
                            + " and prefix '{}' to specify how many bytes in a bucket are valid.",
                    validLengthSuffix, validLengthPrefix);
            return null;
        }

        // verify that truncate actually works
        FSDataOutputStream outputStream;
        Path testPath = new Path(UUID.randomUUID().toString());
        try {
            outputStream = fs.create(testPath);
            outputStream.writeUTF("hello");
            outputStream.close();
        } catch (IOException e) {
            LOG.error("Could not create file for checking if truncate works.", e);
            throw new RuntimeException("Could not create file for checking if truncate works.", e);
        }

        try {
            m.invoke(fs, testPath, 2);
        } catch (IllegalAccessException | InvocationTargetException e) {
            LOG.debug("Truncate is not supported.", e);
            m = null;
        }

        try {
            fs.delete(testPath, false);
        } catch (IOException e) {
            LOG.error("Could not delete truncate test file.", e);
            throw new RuntimeException("Could not delete truncate test file.", e);
        }
    }
    return m;
}

From source file:com.mvad.flink.demo.streaming.lib.sink.bucketing.BucketingSink.java

License:Apache License

@Override
public void restoreState(State<T> state) {
    this.state = state;

    FileSystem fs;
    try {/*from   w w  w . j  a va2 s .  co  m*/
        fs = new Path(basePath).getFileSystem(HadoopFileSystem.getHadoopConfiguration());
    } catch (IOException e) {
        LOG.error("Error while creating FileSystem in checkpoint restore.", e);
        throw new RuntimeException("Error while creating FileSystem in checkpoint restore.", e);
    }

    for (BucketState<T> bucketState : state.bucketStates.values()) {
        // we can clean all the pending files since they where renamed to final files
        // after this checkpoint was successful
        bucketState.pendingFiles.clear();

        if (bucketState.currentFile != null) {
            // We were writing to a file when the last checkpoint occured. This file can either
            // be still in-progress or became a pending file at some point after the checkpoint.
            // Either way, we have to truncate it back to a valid state (or write a .valid-length)
            // file that specifies up to which length it is valid and rename it to the final name
            // before starting a new bucket file.
            Path partPath = new Path(bucketState.currentFile);
            try {
                Path partPendingPath = new Path(partPath.getParent(), pendingPrefix + partPath.getName())
                        .suffix(pendingSuffix);
                Path partInProgressPath = new Path(partPath.getParent(), inProgressPrefix + partPath.getName())
                        .suffix(inProgressSuffix);

                if (fs.exists(partPendingPath)) {
                    LOG.debug(
                            "In-progress file {} has been moved to pending after checkpoint, moving to final location.",
                            partPath);
                    // has been moved to pending in the mean time, rename to final location
                    fs.rename(partPendingPath, partPath);
                } else if (fs.exists(partInProgressPath)) {
                    LOG.debug("In-progress file {} is still in-progress, moving to final location.", partPath);
                    // it was still in progress, rename to final path
                    fs.rename(partInProgressPath, partPath);
                } else if (fs.exists(partPath)) {
                    LOG.debug("In-Progress file {} was already moved to final location {}.",
                            bucketState.currentFile, partPath);
                } else {
                    LOG.debug(
                            "In-Progress file {} was neither moved to pending nor is still in progress. Possibly, "
                                    + "it was moved to final location by a previous snapshot restore",
                            bucketState.currentFile);
                }

                refTruncate = reflectTruncate(fs);
                // truncate it or write a ".valid-length" file to specify up to which point it is valid
                if (refTruncate != null) {
                    LOG.debug("Truncating {} to valid length {}", partPath, bucketState.currentFileValidLength);
                    // some-one else might still hold the lease from a previous try, we are
                    // recovering, after all ...
                    if (fs instanceof DistributedFileSystem) {
                        DistributedFileSystem dfs = (DistributedFileSystem) fs;
                        LOG.debug("Trying to recover file lease {}", partPath);
                        dfs.recoverLease(partPath);
                        boolean isclosed = dfs.isFileClosed(partPath);
                        StopWatch sw = new StopWatch();
                        sw.start();
                        while (!isclosed) {
                            if (sw.getTime() > asyncTimeout) {
                                break;
                            }
                            try {
                                Thread.sleep(500);
                            } catch (InterruptedException e1) {
                                // ignore it
                            }
                            isclosed = dfs.isFileClosed(partPath);
                        }
                    }
                    Boolean truncated = (Boolean) refTruncate.invoke(fs, partPath,
                            bucketState.currentFileValidLength);
                    if (!truncated) {
                        LOG.debug("Truncate did not immediately complete for {}, waiting...", partPath);

                        // we must wait for the asynchronous truncate operation to complete
                        StopWatch sw = new StopWatch();
                        sw.start();
                        long newLen = fs.getFileStatus(partPath).getLen();
                        while (newLen != bucketState.currentFileValidLength) {
                            if (sw.getTime() > asyncTimeout) {
                                break;
                            }
                            try {
                                Thread.sleep(500);
                            } catch (InterruptedException e1) {
                                // ignore it
                            }
                            newLen = fs.getFileStatus(partPath).getLen();
                        }
                        if (newLen != bucketState.currentFileValidLength) {
                            throw new RuntimeException("Truncate did not truncate to right length. Should be "
                                    + bucketState.currentFileValidLength + " is " + newLen + ".");
                        }
                    }

                } else {
                    LOG.debug("Writing valid-length file for {} to specify valid length {}", partPath,
                            bucketState.currentFileValidLength);
                    Path validLengthFilePath = new Path(partPath.getParent(),
                            validLengthPrefix + partPath.getName()).suffix(validLengthSuffix);
                    if (!fs.exists(validLengthFilePath)) {
                        FSDataOutputStream lengthFileOut = fs.create(validLengthFilePath);
                        lengthFileOut.writeUTF(Long.toString(bucketState.currentFileValidLength));
                        lengthFileOut.close();
                    }
                }

                // Now that we've restored the bucket to a valid state, reset the current file info
                bucketState.currentFile = null;
                bucketState.currentFileValidLength = -1;
            } catch (IOException e) {
                LOG.error("Error while restoring BucketingSink state.", e);
                throw new RuntimeException("Error while restoring BucketingSink state.", e);
            } catch (InvocationTargetException | IllegalAccessException e) {
                LOG.error("Cound not invoke truncate.", e);
                throw new RuntimeException("Could not invoke truncate.", e);
            }
        }

        LOG.debug("Clearing pending/in-progress files.");

        // Move files that are confirmed by a checkpoint but did not get moved to final location
        // because the checkpoint notification did not happen before a failure

        Set<Long> pastCheckpointIds = bucketState.pendingFilesPerCheckpoint.keySet();
        LOG.debug("Moving pending files to final location on restore.");
        for (Long pastCheckpointId : pastCheckpointIds) {
            // All the pending files are buckets that have been completed but are waiting to be renamed
            // to their final name
            for (String filename : bucketState.pendingFilesPerCheckpoint.get(pastCheckpointId)) {
                Path finalPath = new Path(filename);
                Path pendingPath = new Path(finalPath.getParent(), pendingPrefix + finalPath.getName())
                        .suffix(pendingSuffix);

                try {
                    if (fs.exists(pendingPath)) {
                        LOG.debug(
                                "(RESTORE) Moving pending file {} to final location after complete checkpoint {}.",
                                pendingPath, pastCheckpointId);
                        fs.rename(pendingPath, finalPath);
                    }
                } catch (IOException e) {
                    LOG.error("(RESTORE) Error while renaming pending file {} to final path {}: {}",
                            pendingPath, finalPath, e);
                    throw new RuntimeException(
                            "Error while renaming pending file " + pendingPath + " to final path " + finalPath,
                            e);
                }
            }
        }

        synchronized (bucketState.pendingFilesPerCheckpoint) {
            bucketState.pendingFilesPerCheckpoint.clear();
        }
    }

    // we need to get this here since open() has not yet been called
    int subtaskIndex = getRuntimeContext().getIndexOfThisSubtask();
    // delete pending files
    try {

        RemoteIterator<LocatedFileStatus> bucketFiles = fs.listFiles(new Path(basePath), true);

        while (bucketFiles.hasNext()) {
            LocatedFileStatus file = bucketFiles.next();
            if (file.getPath().toString().endsWith(pendingSuffix)) {
                // only delete files that contain our subtask index
                if (file.getPath().toString().contains(partPrefix + "-" + subtaskIndex + "-")) {
                    LOG.debug("(RESTORE) Deleting pending file {}", file.getPath().toString());
                    fs.delete(file.getPath(), true);
                }
            }
            if (file.getPath().toString().endsWith(inProgressSuffix)) {
                // only delete files that contain our subtask index
                if (file.getPath().toString().contains(partPrefix + "-" + subtaskIndex + "-")) {
                    LOG.debug("(RESTORE) Deleting in-progress file {}", file.getPath().toString());
                    fs.delete(file.getPath(), true);
                }
            }
        }
    } catch (IOException e) {
        LOG.error("Error while deleting old pending files: {}", e);
        throw new RuntimeException("Error while deleting old pending files.", e);
    }
}

From source file:com.mvdb.etl.actions.ActionUtils.java

License:Apache License

public static void writeStringToHdfsFile(String str, String hdfsFile) throws IOException {

    String hdfsHome = getConfigurationValue(ConfigurationKeys.GLOBAL_CUSTOMER,
            ConfigurationKeys.GLOBAL_HADOOP_HOME);
    org.apache.hadoop.conf.Configuration conf = new org.apache.hadoop.conf.Configuration();
    conf.addResource(new Path(hdfsHome + "/conf/core-site.xml"));
    FileSystem hdfsFileSystem = FileSystem.get(conf);

    Path hdfsFilePath = new Path(hdfsFile);

    if (hdfsFileSystem.exists(hdfsFilePath)) {
        boolean deleteSuccess = hdfsFileSystem.delete(hdfsFilePath, true);
        if (deleteSuccess == false) {
            throw new RuntimeException("Unable to delete " + hdfsFilePath.toString());
        }//from  ww w. j  ava  2 s  .  c o m
    }

    if (hdfsFileSystem.exists(hdfsFilePath)) {
        throw new RuntimeException("Output " + hdfsFilePath + "already exists");
    }

    logger.info("Copy " + str + " in to " + hdfsFilePath.toString());

    FSDataOutputStream out = hdfsFileSystem.create(hdfsFilePath);
    byte[] bytes = str.getBytes();
    out.write(bytes, 0, bytes.length);
    out.close();

}

From source file:com.mvdb.etl.actions.ActionUtils.java

License:Apache License

public static void copyLocalDirectoryToHdfsDirectory(String localDirectory, String hdfsDirectory)
        throws Throwable {
    String hdfsHome = getConfigurationValue(ConfigurationKeys.GLOBAL_CUSTOMER,
            ConfigurationKeys.GLOBAL_HADOOP_HOME);
    org.apache.hadoop.conf.Configuration conf = new org.apache.hadoop.conf.Configuration();
    conf.addResource(new Path(hdfsHome + "/conf/core-site.xml"));
    FileSystem hdfsFileSystem = FileSystem.get(conf);

    FileSystem localFileSystem = FileSystem.get(new org.apache.hadoop.conf.Configuration());

    Path localDirectoryPath = new Path(localDirectory);
    Path hdfsDirectoryPath = new Path(hdfsDirectory);

    if (hdfsFileSystem.exists(hdfsDirectoryPath)) {
        boolean deleteSuccess = hdfsFileSystem.delete(hdfsDirectoryPath, true);
        if (deleteSuccess == false) {
            throw new RuntimeException("Unable to delete " + hdfsDirectoryPath.toString());
        }/*from  w ww .j a v a2  s.c  om*/
    }
    if (!localFileSystem.exists(localDirectoryPath)) {
        throw new RuntimeException("Input directory " + localDirectoryPath + " not found");
    }
    FileStatus fileStatus1 = localFileSystem.getFileStatus(localDirectoryPath);
    if (!fileStatus1.isDir()) {
        throw new RuntimeException("Input " + localDirectoryPath + " should be a directory");
    }
    if (hdfsFileSystem.exists(hdfsDirectoryPath)) {
        throw new RuntimeException("Output " + hdfsDirectoryPath + "already exists");
    }

    logger.info("Attempting Copy " + localDirectoryPath.toString() + " to " + hdfsDirectoryPath.toString());
    FileUtil.copy(localFileSystem, localDirectoryPath, hdfsFileSystem, hdfsDirectoryPath, false, conf);
    logger.info("-Completed Copy " + localDirectoryPath.toString() + " to " + hdfsDirectoryPath.toString());

}

From source file:com.mvdb.platform.action.VersionMerge.java

License:Apache License

public static void main(String[] args) throws Exception {
    logger.error("error1");
    logger.warn("warning1");
    logger.info("info1");
    logger.debug("debug1");
    logger.trace("trace1");
    ActionUtils.setUpInitFileProperty();
    //        LoggerContext lc = (LoggerContext) LoggerFactory.getILoggerFactory();
    //        StatusPrinter.print(lc);

    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    //Also add  lastMergedTimeStamp and  mergeUptoTimestamp and passive db name which would be mv1 or mv2
    if (otherArgs.length != 3) {
        System.err.println("Usage: versionmerge <customer-directory>");
        System.exit(2);/*from   w  w w .  ja  v a2 s .c  o  m*/
    }
    //Example: file:/home/umesh/.mvdb/etl/data/alpha
    //Example: hdfs://localhost:9000/data/alpha
    String customerDirectory = otherArgs[0];
    String lastMergedDirName = otherArgs[1];
    String lastCopiedDirName = otherArgs[2];

    org.apache.hadoop.conf.Configuration conf1 = new org.apache.hadoop.conf.Configuration();
    //conf1.addResource(new Path("/home/umesh/ops/hadoop-1.2.0/conf/core-site.xml"));
    FileSystem hdfsFileSystem = FileSystem.get(conf1);

    Path topPath = new Path(customerDirectory);

    //Clean scratch db
    Path passiveDbPath = new Path(topPath, "db/mv1");
    Path tempDbPath = new Path(topPath, "db/tmp-" + (int) (Math.random() * 100000));
    if (hdfsFileSystem.exists(tempDbPath)) {
        boolean success = hdfsFileSystem.delete(tempDbPath, true);
        if (success == false) {
            System.err.println(String.format("Unable to delete temp directory %s", tempDbPath.toString()));
            System.exit(1);
        }
    }
    //last three parameters are hardcoded and  the nulls must be replaced later after changing inout parameters. 
    Path[] inputPaths = getInputPaths(hdfsFileSystem, topPath, lastMergedDirName, lastCopiedDirName, null);
    Set<String> tableNameSet = new HashSet<String>();
    for (Path path : inputPaths) {
        tableNameSet.add(path.getName());
    }

    Job job = new Job(conf, "versionmerge");
    job.setJarByClass(VersionMerge.class);
    job.setMapperClass(VersionMergeMapper.class);
    job.setReducerClass(VersionMergeReducer.class);
    job.setMapOutputKeyClass(MergeKey.class);
    job.setMapOutputValueClass(BytesWritable.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(BytesWritable.class);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    String lastDirName = null;
    if (inputPaths != null && inputPaths.length > 1) {
        lastDirName = inputPaths[(inputPaths.length) - 2].getParent().getName();
    }
    for (Path inputPath : inputPaths) {
        FileInputFormat.addInputPath(job, inputPath);
    }
    FileOutputFormat.setOutputPath(job, tempDbPath);

    for (String table : tableNameSet) {
        if (table.endsWith(".dat") == false) {
            continue;
        }
        table = table.replaceAll("-", "");
        table = table.replaceAll(".dat", "");
        MultipleOutputs.addNamedOutput(job, table, SequenceFileOutputFormat.class, Text.class,
                BytesWritable.class);
    }
    boolean success = job.waitForCompletion(true);
    System.out.println("Success:" + success);
    System.out.println(ManagementFactory.getRuntimeMXBean().getName());
    if (success && lastDirName != null) {
        ActionUtils.setConfigurationValue(new Path(customerDirectory).getName(),
                ConfigurationKeys.LAST_MERGE_TO_MVDB_DIRNAME, lastDirName);
    }
    //hdfsFileSystem.delete(passiveDbPath, true);
    //hdfsFileSystem.rename(tempDbPath, passiveDbPath);
    System.exit(success ? 0 : 1);
}

From source file:com.mycompany.hadooptrain.WordCount.java

public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {

    Path inputPath = new Path(args[0]);
    Path outputDir = new Path(args[1]);

    // Create configuration
    Configuration conf = new Configuration(true);

    // Create job
    Job job = new Job(conf, "WordCount");
    job.setJarByClass(WordCountMapper.class);

    // Setup MapReduce
    job.setMapperClass(WordCountMapper.class);
    job.setReducerClass(WordCountReducer.class);
    job.setNumReduceTasks(1);//from w ww . j a  v a  2  s. co  m

    // Specify key / value
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    // Input
    FileInputFormat.addInputPath(job, inputPath);
    job.setInputFormatClass(TextInputFormat.class);

    // Output
    FileOutputFormat.setOutputPath(job, outputDir);
    job.setOutputFormatClass(TextOutputFormat.class);

    // Delete output if exists
    FileSystem hdfs = FileSystem.get(conf);
    if (hdfs.exists(outputDir))
        hdfs.delete(outputDir, true);

    // Execute job
    int code = job.waitForCompletion(true) ? 0 : 1;
    System.exit(code);

}

From source file:com.mycompany.keywordsearch.KeywordSearch.java

private static void clearOutput(Configuration conf, Path path) throws IOException {
    FileSystem fs = FileSystem.get(conf);
    if (fs.exists(path)) {
        fs.delete(path, true);
    }//from w  w  w .  j ava2 s. com
}

From source file:com.mycompany.mavenpails2.PailMove.java

public static void setApplicationConf() throws IOException {
    Map conf = new HashMap();
    String sers = "backtype.hadoop.ThriftSerialization,org.apache.hadoop.io.serializer.WritableSerialization";
    conf.put("io.serializations", sers);
    Api.setApplicationConf(conf);//from   w  ww. ja  va  2  s  . c  o m

    FileSystem fs = FileSystem.get(new Configuration());
    fs.delete(new Path(TEMP_DIR), true);
    fs.mkdirs(new Path(TEMP_DIR));
    /* Configuration conf2 = new Configuration();
    FileSystem fs = FileSystem.get(conf2);
    fs.delete(new Path(TEMP_DIR), true);
    fs.mkdirs(new Path(TEMP_DIR)); */
}

From source file:com.mycompany.mavenproject1.App.java

public static void main(String[] args) throws IOException {

    // give time to attach debugger
    try {/*from  ww w  .ja  va  2s . c  o m*/
        Thread.sleep(8000);
    } catch (InterruptedException ex) {
        Logger.getLogger(App.class.getName()).log(Level.SEVERE, null, ex);
    }

    JobConf conf = new JobConf(App.class);

    // purge existing output file
    FileSystem fs = FileSystem.get(conf);
    fs.delete(new Path(args[1]), true); // delete file, true for recursive 

    conf.setJobName("wordcount");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);

    conf.setMapperClass(Map.class);
    conf.setCombinerClass(Reduce.class);
    conf.setReducerClass(Reduce.class);

    conf.setInputFormat(WholeFileInputFormat.class);
    // conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    FileInputFormat.setInputPaths(conf, new Path(args[0]));
    FileOutputFormat.setOutputPath(conf, new Path(args[1]));

    JobClient.runJob(conf);
}

From source file:com.nearinfinity.blur.mapreduce.BlurReducer.java

License:Apache License

protected void remove(Path directoryPath) throws IOException {
    FileSystem fileSystem = FileSystem.get(directoryPath.toUri(), _configuration);
    fileSystem.delete(directoryPath, true);
}