Example usage for org.apache.hadoop.fs FileSystem delete

List of usage examples for org.apache.hadoop.fs FileSystem delete

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem delete.

Prototype

public abstract boolean delete(Path f, boolean recursive) throws IOException;

Source Link

Document

Delete a file.

Usage

From source file:com.cloudera.flume.handlers.hive.MarkerStore.java

License:Apache License

public boolean cleanMarkerFile(String hiveMarkerPath) {
    LOG.debug("cleaning up hiveMarker: " + hiveMarkerPath);
    FileSystem localHdfs;
    Path deletePath = new Path(hiveMarkerPath);
    try {// ww  w.  j  a  v  a2  s.  co  m
        localHdfs = deletePath.getFileSystem(conf);
        if (localHdfs.delete(deletePath, false)) {
            LOG.debug("hiveMarker deleted successfully: " + hiveMarkerPath);
            return true;
        } else {
            LOG.error("error deleting hive marker: " + hiveMarkerPath);
        }
    } catch (IOException e) {
        // TODO Auto-generated catch block
        LOG.error("Error deleting hiveMarker: " + e.getMessage());
    }
    return false;
}

From source file:com.cloudera.flume.handlers.hive.MarkerStore.java

License:Apache License

public boolean mergeFiles(String folder, Path file, String hiveOutputLocation) {
    FileSystem hdfs;
    FSDataInputStream in;/*from   ww  w .  ja  va2s. co  m*/
    FSDataOutputStream out;
    List<Path> fileCollection = new ArrayList<Path>();
    dstPath = new Path(folder);
    LOG.info("mergeFiles DSTPATH: " + dstPath);
    try {
        hdfs = dstPath.getFileSystem(conf);

        if (hdfs.exists(dstPath)) {
            FileStatus[] fileListing = hdfs.listStatus(dstPath);
            LOG.error("Creating file @: " + hiveOutputLocation);
            out = hdfs.create(new Path(hiveOutputLocation));

            in = hdfs.open(file);
            byte[] fileData = new byte[(int) hdfs.getFileStatus(file).getLen()];
            in.readFully(fileData);
            out.write(fileData);

            for (FileStatus fs : fileListing) {
                if (!fs.isDir()) {
                    LOG.info("mergeFiles File marker path: " + fs.getPath());
                    fileCollection.add(fs.getPath());
                    in = hdfs.open(fs.getPath());
                    fileData = new byte[(int) fs.getLen()];
                    in.readFully(fileData);
                    out.write(fileData);
                }
            }
            out.close();
        }

        hdfs.close();
        LOG.error("Written file: " + hiveOutputLocation);

        //lets start the purge process, delete all files except the merged file
        hdfs = dstPath.getFileSystem(conf);
        for (Path p : fileCollection) {
            if (hdfs.delete(p, false)) {
                LOG.error("Successfully deleted: " + p);
            } else {
                LOG.error("Error deleting file: " + p);
            }
        }

    } catch (IOException e) {
        LOG.error("ERROR running runMarkerQueries:" + e.getMessage());
    }
    LOG.error("mergeFiles Done merging files");
    return false;
}

From source file:com.cloudera.hadoop.hdfs.nfs.nfs4.handlers.REMOVEHandler.java

License:Apache License

@Override
protected REMOVEResponse doHandle(NFS4Handler server, Session session, REMOVERequest request)
        throws NFS4Exception, IOException {
    if (session.getCurrentFileHandle() == null) {
        throw new NFS4Exception(NFS4ERR_NOFILEHANDLE);
    }/*from   www . j ava  2s .  co m*/
    if ("".equals(request.getName())) {
        throw new NFS4Exception(NFS4ERR_INVAL);
    }
    Path parentPath = server.getPath(session.getCurrentFileHandle());
    Path path = new Path(parentPath, request.getName());
    FileSystem fs = session.getFileSystem();
    if (!fs.exists(path)) {
        throw new NFS4Exception(NFS4ERR_NOENT);
    }
    REMOVEResponse response = createResponse();
    ChangeInfo changeInfo = new ChangeInfo();
    FileStatus parentStatus = fs.getFileStatus(parentPath);
    ChangeID changeIDBefore = new ChangeID();
    changeIDBefore.setChangeID(parentStatus.getModificationTime());
    changeInfo.setChangeIDBefore(changeIDBefore);

    fs.delete(path, false);

    parentStatus = fs.getFileStatus(parentPath);
    ChangeID changeIDAfter = new ChangeID();
    changeIDAfter.setChangeID(parentStatus.getModificationTime());
    changeInfo.setChangeIDAfter(changeIDAfter);
    changeInfo.setAtomic(true);
    response.setChangeInfo(changeInfo);
    response.setStatus(NFS4_OK);
    return response;
}

From source file:com.cloudera.hadoop.hdfs.nfs.nfs4.state.HDFSState.java

License:Apache License

/**
 * Deletes a file from fs. If the file is open for writing,
 * the file will not be deleted.//from  w  w  w  . jav  a 2  s  .  co  m
 * @param path
 * @return
 * @throws IOException
 */
public synchronized boolean delete(FileSystem fs, Path path) throws IOException {
    FileHandle fileHandle = mFileHandleINodeMap.getFileHandleByPath(realPath(path));
    HDFSFile hdfsFile = mOpenFilesMap.get(fileHandle);
    if ((hdfsFile != null) && hdfsFile.isOpenForWrite()) {
        return false;
    }
    return fs.delete(path, false);
}

From source file:com.cloudera.hoop.client.fs.TestHoopFileSystem.java

License:Open Source License

private void testDelete() throws Exception {
    Path foo = new Path(getHadoopTestDir(), "foo");
    Path bar = new Path(getHadoopTestDir(), "bar");
    Path foe = new Path(getHadoopTestDir(), "foe");
    FileSystem fs = FileSystem.get(getHadoopConf());
    fs.mkdirs(foo);//from w w w  .  j  a  v  a2 s .c  om
    fs.mkdirs(new Path(bar, "a"));
    fs.mkdirs(foe);

    Configuration conf = new Configuration();
    conf.set("fs.http.impl", HoopFileSystem.class.getName());
    FileSystem hoopFs = FileSystem.get(getJettyURL().toURI(), conf);
    Assert.assertTrue(hoopFs.delete(new Path(foo.toUri().getPath()), false));
    Assert.assertFalse(fs.exists(foo));
    try {
        hoopFs.delete(new Path(bar.toUri().getPath()), false);
        Assert.fail();
    } catch (IOException ex) {
    } catch (Exception ex) {
        Assert.fail();
    }
    Assert.assertTrue(fs.exists(bar));
    Assert.assertTrue(hoopFs.delete(new Path(bar.toUri().getPath()), true));
    Assert.assertFalse(fs.exists(bar));

    Assert.assertTrue(fs.exists(foe));
    Assert.assertTrue(hoopFs.delete(foe));
    Assert.assertFalse(fs.exists(foe));

    hoopFs.close();
    fs.close();
}

From source file:com.cloudera.hoop.fs.FSDelete.java

License:Open Source License

/**
 * Executes the filesystem operation./*www .jav a 2s .  com*/
 *
 * @param fs filesystem instance to use.
 * @return <code>true</code> if the delete operation was successful,
 * <code>false</code> otherwise.
 * @throws IOException thrown if an IO error occured.
 */
@Override
public JSONObject execute(FileSystem fs) throws IOException {
    boolean deleted = fs.delete(path, recursive);
    return FSUtils.toJSON("delete", deleted);
}

From source file:com.cloudera.impala.common.FileSystemUtil.java

License:Apache License

/**
 * Performs a non-recursive delete of all visible (non-hidden) files in a given
 * directory. Returns the number of files deleted as part of this operation.
 *///from  ww w .j  a v  a2s  . c  om
public static int deleteAllVisibleFiles(Path directory) throws IOException {
    FileSystem fs = directory.getFileSystem(CONF);
    Preconditions.checkState(fs.getFileStatus(directory).isDirectory());
    int numFilesDeleted = 0;
    for (FileStatus fStatus : fs.listStatus(directory)) {
        // Only delete files that are not hidden.
        if (fStatus.isFile() && !isHiddenFile(fStatus.getPath().getName())) {
            LOG.debug("Removing: " + fStatus.getPath());
            fs.delete(fStatus.getPath(), false);
            ++numFilesDeleted;
        }
    }
    return numFilesDeleted;
}

From source file:com.cloudera.oryx.lambda.DeleteOldDataFn.java

License:Open Source License

@Override
public void call(T ignored) throws IOException {
    Path dataDirPath = new Path(dataDirString + "/*");
    FileSystem fs = FileSystem.get(dataDirPath.toUri(), hadoopConf);
    FileStatus[] inputPathStatuses = fs.globStatus(dataDirPath);
    if (inputPathStatuses != null) {
        long oldestTimeAllowed = System.currentTimeMillis()
                - TimeUnit.MILLISECONDS.convert(maxAgeHours, TimeUnit.HOURS);
        Arrays.stream(inputPathStatuses).filter(FileStatus::isDirectory).map(FileStatus::getPath)
                .filter(subdir -> {// w ww .  ja v a  2s.  c  om
                    Matcher m = dirTimestampPattern.matcher(subdir.getName());
                    return m.find() && Long.parseLong(m.group(1)) < oldestTimeAllowed;
                }).forEach(subdir -> {
                    log.info("Deleting old data at {}", subdir);
                    try {
                        fs.delete(subdir, true);
                    } catch (IOException e) {
                        log.warn("Unable to delete {}; continuing", subdir, e);
                    }
                });
    }
}

From source file:com.cloudera.oryx.ml.MLUpdate.java

License:Open Source License

@Override
public void runUpdate(JavaSparkContext sparkContext, long timestamp, JavaPairRDD<Object, M> newKeyMessageData,
        JavaPairRDD<Object, M> pastKeyMessageData, String modelDirString,
        TopicProducer<String, String> modelUpdateTopic) throws IOException, InterruptedException {

    Objects.requireNonNull(newKeyMessageData);

    JavaRDD<M> newData = newKeyMessageData.values();
    JavaRDD<M> pastData = pastKeyMessageData == null ? null : pastKeyMessageData.values();

    if (newData != null) {
        newData.cache();/*from  w w w .j  a va 2 s .c  o m*/
        // This forces caching of the RDD. This shouldn't be necessary but we see some freezes
        // when many workers try to materialize the RDDs at once. Hence the workaround.
        newData.foreachPartition(p -> {
        });
    }
    if (pastData != null) {
        pastData.cache();
        pastData.foreachPartition(p -> {
        });
    }

    List<HyperParamValues<?>> hyperParamValues = getHyperParameterValues();
    int valuesPerHyperParam = HyperParams.chooseValuesPerHyperParam(hyperParamValues.size(), candidates);
    List<List<?>> hyperParameterCombos = HyperParams.chooseHyperParameterCombos(hyperParamValues, candidates,
            valuesPerHyperParam);

    Path modelDir = new Path(modelDirString);
    Path tempModelPath = new Path(modelDir, ".temporary");
    Path candidatesPath = new Path(tempModelPath, Long.toString(System.currentTimeMillis()));

    FileSystem fs = FileSystem.get(modelDir.toUri(), sparkContext.hadoopConfiguration());
    fs.mkdirs(candidatesPath);

    Path bestCandidatePath = findBestCandidatePath(sparkContext, newData, pastData, hyperParameterCombos,
            candidatesPath);

    Path finalPath = new Path(modelDir, Long.toString(System.currentTimeMillis()));
    if (bestCandidatePath == null) {
        log.info("Unable to build any model");
    } else {
        // Move best model into place
        fs.rename(bestCandidatePath, finalPath);
    }
    // Then delete everything else
    fs.delete(candidatesPath, true);

    if (modelUpdateTopic == null) {
        log.info("No update topic configured, not publishing models to a topic");
    } else {
        // Push PMML model onto update topic, if it exists
        Path bestModelPath = new Path(finalPath, MODEL_FILE_NAME);
        if (fs.exists(bestModelPath)) {
            FileStatus bestModelPathFS = fs.getFileStatus(bestModelPath);
            PMML bestModel = null;
            boolean modelNeededForUpdates = canPublishAdditionalModelData();
            boolean modelNotTooLarge = bestModelPathFS.getLen() <= maxMessageSize;
            if (modelNeededForUpdates || modelNotTooLarge) {
                // Either the model is required for publishAdditionalModelData, or required because it's going to
                // be serialized to Kafka
                try (InputStream in = fs.open(bestModelPath)) {
                    bestModel = PMMLUtils.read(in);
                }
            }

            if (modelNotTooLarge) {
                modelUpdateTopic.send("MODEL", PMMLUtils.toString(bestModel));
            } else {
                modelUpdateTopic.send("MODEL-REF", fs.makeQualified(bestModelPath).toString());
            }

            if (modelNeededForUpdates) {
                publishAdditionalModelData(sparkContext, bestModel, newData, pastData, finalPath,
                        modelUpdateTopic);
            }
        }
    }

    if (newData != null) {
        newData.unpersist();
    }
    if (pastData != null) {
        pastData.unpersist();
    }
}

From source file:com.cloudera.recordservice.examples.mapreduce.RecordCount.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println("Usage: RecordCount <input_query> <output_path>");
        System.exit(1);// ww  w .ja  v  a  2s .  c  om
    }
    String inputQuery = args[0];
    String output = args[1];

    Job job = Job.getInstance(getConf());
    job.setJobName("recordcount");
    job.setJarByClass(RecordCount.class);
    job.setMapperClass(Map.class);
    job.setCombinerClass(Reduce.class);
    job.setReducerClass(Reduce.class);
    job.setNumReduceTasks(1);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(LongWritable.class);

    RecordServiceConfig.setInputQuery(job.getConfiguration(), inputQuery);
    job.setInputFormatClass(RecordServiceInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    FileSystem fs = FileSystem.get(job.getConfiguration());
    Path outputPath = new Path(output);
    if (fs.exists(outputPath))
        fs.delete(outputPath, true);
    FileOutputFormat.setOutputPath(job, outputPath);

    return job.waitForCompletion(true) ? 0 : 1;
}