Example usage for org.apache.hadoop.fs FileSystem delete

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem delete.

Prototype

public abstract boolean delete(Path f, boolean recursive) throws IOException;

Source Link

Document

Delete a file.

Usage

From source file:com.cloudera.flume.handlers.hive.MarkerStore.java

License:Apache License

public boolean cleanMarkerFile(String hiveMarkerPath) {
    LOG.debug("cleaning up hiveMarker: " + hiveMarkerPath);
    FileSystem localHdfs;
    Path deletePath = new Path(hiveMarkerPath);
    try {// ww  w.  j  a  v  a2  s.  co  m
        localHdfs = deletePath.getFileSystem(conf);
        if (localHdfs.delete(deletePath, false)) {
            LOG.debug("hiveMarker deleted successfully: " + hiveMarkerPath);
            return true;
        } else {
            LOG.error("error deleting hive marker: " + hiveMarkerPath);
        }
    } catch (IOException e) {
        // TODO Auto-generated catch block
        LOG.error("Error deleting hiveMarker: " + e.getMessage());
    }
    return false;
}

From source file:com.cloudera.flume.handlers.hive.MarkerStore.java

License:Apache License

public boolean mergeFiles(String folder, Path file, String hiveOutputLocation) {
    FileSystem hdfs;
    FSDataInputStream in;/*from   ww  w .  ja  va2s. co  m*/
    FSDataOutputStream out;
    List<Path> fileCollection = new ArrayList<Path>();
    dstPath = new Path(folder);
    LOG.info("mergeFiles DSTPATH: " + dstPath);
    try {
        hdfs = dstPath.getFileSystem(conf);

        if (hdfs.exists(dstPath)) {
            FileStatus[] fileListing = hdfs.listStatus(dstPath);
            LOG.error("Creating file @: " + hiveOutputLocation);
            out = hdfs.create(new Path(hiveOutputLocation));

            in = hdfs.open(file);
            byte[] fileData = new byte[(int) hdfs.getFileStatus(file).getLen()];
            in.readFully(fileData);
            out.write(fileData);

            for (FileStatus fs : fileListing) {
                if (!fs.isDir()) {
                    LOG.info("mergeFiles File marker path: " + fs.getPath());
                    fileCollection.add(fs.getPath());
                    in = hdfs.open(fs.getPath());
                    fileData = new byte[(int) fs.getLen()];
                    in.readFully(fileData);
                    out.write(fileData);
                }
            }
            out.close();
        }

        hdfs.close();
        LOG.error("Written file: " + hiveOutputLocation);

        //lets start the purge process, delete all files except the merged file
        hdfs = dstPath.getFileSystem(conf);
        for (Path p : fileCollection) {
            if (hdfs.delete(p, false)) {
                LOG.error("Successfully deleted: " + p);
            } else {
                LOG.error("Error deleting file: " + p);
            }
        }

    } catch (IOException e) {
        LOG.error("ERROR running runMarkerQueries:" + e.getMessage());
    }
    LOG.error("mergeFiles Done merging files");
    return false;
}

From source file:com.cloudera.hadoop.hdfs.nfs.nfs4.handlers.REMOVEHandler.java

License:Apache License

@Override
protected REMOVEResponse doHandle(NFS4Handler server, Session session, REMOVERequest request)
        throws NFS4Exception, IOException {
    if (session.getCurrentFileHandle() == null) {
        throw new NFS4Exception(NFS4ERR_NOFILEHANDLE);
    }/*from   www . j ava  2s .  co m*/
    if ("".equals(request.getName())) {
        throw new NFS4Exception(NFS4ERR_INVAL);
    }
    Path parentPath = server.getPath(session.getCurrentFileHandle());
    Path path = new Path(parentPath, request.getName());
    FileSystem fs = session.getFileSystem();
    if (!fs.exists(path)) {
        throw new NFS4Exception(NFS4ERR_NOENT);
    }
    REMOVEResponse response = createResponse();
    ChangeInfo changeInfo = new ChangeInfo();
    FileStatus parentStatus = fs.getFileStatus(parentPath);
    ChangeID changeIDBefore = new ChangeID();
    changeIDBefore.setChangeID(parentStatus.getModificationTime());
    changeInfo.setChangeIDBefore(changeIDBefore);

    fs.delete(path, false);

    parentStatus = fs.getFileStatus(parentPath);
    ChangeID changeIDAfter = new ChangeID();
    changeIDAfter.setChangeID(parentStatus.getModificationTime());
    changeInfo.setChangeIDAfter(changeIDAfter);
    changeInfo.setAtomic(true);
    response.setChangeInfo(changeInfo);
    response.setStatus(NFS4_OK);
    return response;
}

From source file:com.cloudera.hadoop.hdfs.nfs.nfs4.state.HDFSState.java

License:Apache License

/**
 * Deletes a file from fs. If the file is open for writing,
 * the file will not be deleted.//from  w  w  w  . jav  a 2  s  .  co  m
 * @param path
 * @return
 * @throws IOException
 */
public synchronized boolean delete(FileSystem fs, Path path) throws IOException {
    FileHandle fileHandle = mFileHandleINodeMap.getFileHandleByPath(realPath(path));
    HDFSFile hdfsFile = mOpenFilesMap.get(fileHandle);
    if ((hdfsFile != null) && hdfsFile.isOpenForWrite()) {
        return false;
    }
    return fs.delete(path, false);
}

From source file:com.cloudera.hoop.client.fs.TestHoopFileSystem.java

License:Open Source License

private void testDelete() throws Exception {
    Path foo = new Path(getHadoopTestDir(), "foo");
    Path bar = new Path(getHadoopTestDir(), "bar");
    Path foe = new Path(getHadoopTestDir(), "foe");
    FileSystem fs = FileSystem.get(getHadoopConf());
    fs.mkdirs(foo);//from w w w  .  j  a  v  a2 s .c  om
    fs.mkdirs(new Path(bar, "a"));
    fs.mkdirs(foe);

    Configuration conf = new Configuration();
    conf.set("fs.http.impl", HoopFileSystem.class.getName());
    FileSystem hoopFs = FileSystem.get(getJettyURL().toURI(), conf);
    Assert.assertTrue(hoopFs.delete(new Path(foo.toUri().getPath()), false));
    Assert.assertFalse(fs.exists(foo));
    try {
        hoopFs.delete(new Path(bar.toUri().getPath()), false);
        Assert.fail();
    } catch (IOException ex) {
    } catch (Exception ex) {
        Assert.fail();
    }
    Assert.assertTrue(fs.exists(bar));
    Assert.assertTrue(hoopFs.delete(new Path(bar.toUri().getPath()), true));
    Assert.assertFalse(fs.exists(bar));

    Assert.assertTrue(fs.exists(foe));
    Assert.assertTrue(hoopFs.delete(foe));
    Assert.assertFalse(fs.exists(foe));

    hoopFs.close();
    fs.close();
}

From source file:com.cloudera.hoop.fs.FSDelete.java

License:Open Source License

/**
 * Executes the filesystem operation./*www .jav a 2s .  com*/
 *
 * @param fs filesystem instance to use.
 * @return <code>true</code> if the delete operation was successful,
 * <code>false</code> otherwise.
 * @throws IOException thrown if an IO error occured.
 */
@Override
public JSONObject execute(FileSystem fs) throws IOException {
    boolean deleted = fs.delete(path, recursive);
    return FSUtils.toJSON("delete", deleted);
}

From source file:com.cloudera.impala.common.FileSystemUtil.java

License:Apache License

/**
 * Performs a non-recursive delete of all visible (non-hidden) files in a given
 * directory. Returns the number of files deleted as part of this operation.
 *///from  ww w .j  a v  a2s  . c  om
public static int deleteAllVisibleFiles(Path directory) throws IOException {
    FileSystem fs = directory.getFileSystem(CONF);
    Preconditions.checkState(fs.getFileStatus(directory).isDirectory());
    int numFilesDeleted = 0;
    for (FileStatus fStatus : fs.listStatus(directory)) {
        // Only delete files that are not hidden.
        if (fStatus.isFile() && !isHiddenFile(fStatus.getPath().getName())) {
            LOG.debug("Removing: " + fStatus.getPath());
            fs.delete(fStatus.getPath(), false);
            ++numFilesDeleted;
        }
    }
    return numFilesDeleted;
}

From source file:com.cloudera.oryx.lambda.DeleteOldDataFn.java

License:Open Source License

@Override
public void call(T ignored) throws IOException {
    Path dataDirPath = new Path(dataDirString + "/*");
    FileSystem fs = FileSystem.get(dataDirPath.toUri(), hadoopConf);
    FileStatus[] inputPathStatuses = fs.globStatus(dataDirPath);
    if (inputPathStatuses != null) {
        long oldestTimeAllowed = System.currentTimeMillis()
                - TimeUnit.MILLISECONDS.convert(maxAgeHours, TimeUnit.HOURS);
        Arrays.stream(inputPathStatuses).filter(FileStatus::isDirectory).map(FileStatus::getPath)
                .filter(subdir -> {// w ww .  ja v a  2s.  c  om
                    Matcher m = dirTimestampPattern.matcher(subdir.getName());
                    return m.find() && Long.parseLong(m.group(1)) < oldestTimeAllowed;
                }).forEach(subdir -> {
                    log.info("Deleting old data at {}", subdir);
                    try {
                        fs.delete(subdir, true);
                    } catch (IOException e) {
                        log.warn("Unable to delete {}; continuing", subdir, e);
                    }
                });
    }
}

From source file:com.cloudera.oryx.ml.MLUpdate.java

License:Open Source License

@Override
public void runUpdate(JavaSparkContext sparkContext, long timestamp, JavaPairRDD<Object, M> newKeyMessageData,
        JavaPairRDD<Object, M> pastKeyMessageData, String modelDirString,
        TopicProducer<String, String> modelUpdateTopic) throws IOException, InterruptedException {

    Objects.requireNonNull(newKeyMessageData);

    JavaRDD<M> newData = newKeyMessageData.values();
    JavaRDD<M> pastData = pastKeyMessageData == null ? null : pastKeyMessageData.values();

    if (newData != null) {
        newData.cache();/*from  w w w .j  a va 2 s .c  o m*/
        // This forces caching of the RDD. This shouldn't be necessary but we see some freezes
        // when many workers try to materialize the RDDs at once. Hence the workaround.
        newData.foreachPartition(p -> {
        });
    }
    if (pastData != null) {
        pastData.cache();
        pastData.foreachPartition(p -> {
        });
    }

    List<HyperParamValues<?>> hyperParamValues = getHyperParameterValues();
    int valuesPerHyperParam = HyperParams.chooseValuesPerHyperParam(hyperParamValues.size(), candidates);
    List<List<?>> hyperParameterCombos = HyperParams.chooseHyperParameterCombos(hyperParamValues, candidates,
            valuesPerHyperParam);

    Path modelDir = new Path(modelDirString);
    Path tempModelPath = new Path(modelDir, ".temporary");
    Path candidatesPath = new Path(tempModelPath, Long.toString(System.currentTimeMillis()));

    FileSystem fs = FileSystem.get(modelDir.toUri(), sparkContext.hadoopConfiguration());
    fs.mkdirs(candidatesPath);

    Path bestCandidatePath = findBestCandidatePath(sparkContext, newData, pastData, hyperParameterCombos,
            candidatesPath);

    Path finalPath = new Path(modelDir, Long.toString(System.currentTimeMillis()));
    if (bestCandidatePath == null) {
        log.info("Unable to build any model");
    } else {
        // Move best model into place
        fs.rename(bestCandidatePath, finalPath);
    }
    // Then delete everything else
    fs.delete(candidatesPath, true);

    if (modelUpdateTopic == null) {
        log.info("No update topic configured, not publishing models to a topic");
    } else {
        // Push PMML model onto update topic, if it exists
        Path bestModelPath = new Path(finalPath, MODEL_FILE_NAME);
        if (fs.exists(bestModelPath)) {
            FileStatus bestModelPathFS = fs.getFileStatus(bestModelPath);
            PMML bestModel = null;
            boolean modelNeededForUpdates = canPublishAdditionalModelData();
            boolean modelNotTooLarge = bestModelPathFS.getLen() <= maxMessageSize;
            if (modelNeededForUpdates || modelNotTooLarge) {
                // Either the model is required for publishAdditionalModelData, or required because it's going to
                // be serialized to Kafka
                try (InputStream in = fs.open(bestModelPath)) {
                    bestModel = PMMLUtils.read(in);
                }
            }

            if (modelNotTooLarge) {
                modelUpdateTopic.send("MODEL", PMMLUtils.toString(bestModel));
            } else {
                modelUpdateTopic.send("MODEL-REF", fs.makeQualified(bestModelPath).toString());
            }

            if (modelNeededForUpdates) {
                publishAdditionalModelData(sparkContext, bestModel, newData, pastData, finalPath,
                        modelUpdateTopic);
            }
        }
    }

    if (newData != null) {
        newData.unpersist();
    }
    if (pastData != null) {
        pastData.unpersist();
    }
}

From source file:com.cloudera.recordservice.examples.mapreduce.RecordCount.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println("Usage: RecordCount <input_query> <output_path>");
        System.exit(1);// ww  w .ja  v  a  2s .  c  om
    }
    String inputQuery = args[0];
    String output = args[1];

    Job job = Job.getInstance(getConf());
    job.setJobName("recordcount");
    job.setJarByClass(RecordCount.class);
    job.setMapperClass(Map.class);
    job.setCombinerClass(Reduce.class);
    job.setReducerClass(Reduce.class);
    job.setNumReduceTasks(1);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(LongWritable.class);

    RecordServiceConfig.setInputQuery(job.getConfiguration(), inputQuery);
    job.setInputFormatClass(RecordServiceInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    FileSystem fs = FileSystem.get(job.getConfiguration());
    Path outputPath = new Path(output);
    if (fs.exists(outputPath))
        fs.delete(outputPath, true);
    FileOutputFormat.setOutputPath(job, outputPath);

    return job.waitForCompletion(true) ? 0 : 1;
}