List of usage examples for org.apache.hadoop.fs FileSystem delete
public abstract boolean delete(Path f, boolean recursive) throws IOException;
From source file:com.cloudera.flume.handlers.hive.MarkerStore.java
License:Apache License
public boolean cleanMarkerFile(String hiveMarkerPath) { LOG.debug("cleaning up hiveMarker: " + hiveMarkerPath); FileSystem localHdfs; Path deletePath = new Path(hiveMarkerPath); try {// ww w. j a v a2 s. co m localHdfs = deletePath.getFileSystem(conf); if (localHdfs.delete(deletePath, false)) { LOG.debug("hiveMarker deleted successfully: " + hiveMarkerPath); return true; } else { LOG.error("error deleting hive marker: " + hiveMarkerPath); } } catch (IOException e) { // TODO Auto-generated catch block LOG.error("Error deleting hiveMarker: " + e.getMessage()); } return false; }
From source file:com.cloudera.flume.handlers.hive.MarkerStore.java
License:Apache License
public boolean mergeFiles(String folder, Path file, String hiveOutputLocation) { FileSystem hdfs; FSDataInputStream in;/*from ww w . ja va2s. co m*/ FSDataOutputStream out; List<Path> fileCollection = new ArrayList<Path>(); dstPath = new Path(folder); LOG.info("mergeFiles DSTPATH: " + dstPath); try { hdfs = dstPath.getFileSystem(conf); if (hdfs.exists(dstPath)) { FileStatus[] fileListing = hdfs.listStatus(dstPath); LOG.error("Creating file @: " + hiveOutputLocation); out = hdfs.create(new Path(hiveOutputLocation)); in = hdfs.open(file); byte[] fileData = new byte[(int) hdfs.getFileStatus(file).getLen()]; in.readFully(fileData); out.write(fileData); for (FileStatus fs : fileListing) { if (!fs.isDir()) { LOG.info("mergeFiles File marker path: " + fs.getPath()); fileCollection.add(fs.getPath()); in = hdfs.open(fs.getPath()); fileData = new byte[(int) fs.getLen()]; in.readFully(fileData); out.write(fileData); } } out.close(); } hdfs.close(); LOG.error("Written file: " + hiveOutputLocation); //lets start the purge process, delete all files except the merged file hdfs = dstPath.getFileSystem(conf); for (Path p : fileCollection) { if (hdfs.delete(p, false)) { LOG.error("Successfully deleted: " + p); } else { LOG.error("Error deleting file: " + p); } } } catch (IOException e) { LOG.error("ERROR running runMarkerQueries:" + e.getMessage()); } LOG.error("mergeFiles Done merging files"); return false; }
From source file:com.cloudera.hadoop.hdfs.nfs.nfs4.handlers.REMOVEHandler.java
License:Apache License
@Override protected REMOVEResponse doHandle(NFS4Handler server, Session session, REMOVERequest request) throws NFS4Exception, IOException { if (session.getCurrentFileHandle() == null) { throw new NFS4Exception(NFS4ERR_NOFILEHANDLE); }/*from www . j ava 2s . co m*/ if ("".equals(request.getName())) { throw new NFS4Exception(NFS4ERR_INVAL); } Path parentPath = server.getPath(session.getCurrentFileHandle()); Path path = new Path(parentPath, request.getName()); FileSystem fs = session.getFileSystem(); if (!fs.exists(path)) { throw new NFS4Exception(NFS4ERR_NOENT); } REMOVEResponse response = createResponse(); ChangeInfo changeInfo = new ChangeInfo(); FileStatus parentStatus = fs.getFileStatus(parentPath); ChangeID changeIDBefore = new ChangeID(); changeIDBefore.setChangeID(parentStatus.getModificationTime()); changeInfo.setChangeIDBefore(changeIDBefore); fs.delete(path, false); parentStatus = fs.getFileStatus(parentPath); ChangeID changeIDAfter = new ChangeID(); changeIDAfter.setChangeID(parentStatus.getModificationTime()); changeInfo.setChangeIDAfter(changeIDAfter); changeInfo.setAtomic(true); response.setChangeInfo(changeInfo); response.setStatus(NFS4_OK); return response; }
From source file:com.cloudera.hadoop.hdfs.nfs.nfs4.state.HDFSState.java
License:Apache License
/** * Deletes a file from fs. If the file is open for writing, * the file will not be deleted.//from w w w . jav a 2 s . co m * @param path * @return * @throws IOException */ public synchronized boolean delete(FileSystem fs, Path path) throws IOException { FileHandle fileHandle = mFileHandleINodeMap.getFileHandleByPath(realPath(path)); HDFSFile hdfsFile = mOpenFilesMap.get(fileHandle); if ((hdfsFile != null) && hdfsFile.isOpenForWrite()) { return false; } return fs.delete(path, false); }
From source file:com.cloudera.hoop.client.fs.TestHoopFileSystem.java
License:Open Source License
private void testDelete() throws Exception { Path foo = new Path(getHadoopTestDir(), "foo"); Path bar = new Path(getHadoopTestDir(), "bar"); Path foe = new Path(getHadoopTestDir(), "foe"); FileSystem fs = FileSystem.get(getHadoopConf()); fs.mkdirs(foo);//from w w w . j a v a2 s .c om fs.mkdirs(new Path(bar, "a")); fs.mkdirs(foe); Configuration conf = new Configuration(); conf.set("fs.http.impl", HoopFileSystem.class.getName()); FileSystem hoopFs = FileSystem.get(getJettyURL().toURI(), conf); Assert.assertTrue(hoopFs.delete(new Path(foo.toUri().getPath()), false)); Assert.assertFalse(fs.exists(foo)); try { hoopFs.delete(new Path(bar.toUri().getPath()), false); Assert.fail(); } catch (IOException ex) { } catch (Exception ex) { Assert.fail(); } Assert.assertTrue(fs.exists(bar)); Assert.assertTrue(hoopFs.delete(new Path(bar.toUri().getPath()), true)); Assert.assertFalse(fs.exists(bar)); Assert.assertTrue(fs.exists(foe)); Assert.assertTrue(hoopFs.delete(foe)); Assert.assertFalse(fs.exists(foe)); hoopFs.close(); fs.close(); }
From source file:com.cloudera.hoop.fs.FSDelete.java
License:Open Source License
/** * Executes the filesystem operation./*www .jav a 2s . com*/ * * @param fs filesystem instance to use. * @return <code>true</code> if the delete operation was successful, * <code>false</code> otherwise. * @throws IOException thrown if an IO error occured. */ @Override public JSONObject execute(FileSystem fs) throws IOException { boolean deleted = fs.delete(path, recursive); return FSUtils.toJSON("delete", deleted); }
From source file:com.cloudera.impala.common.FileSystemUtil.java
License:Apache License
/** * Performs a non-recursive delete of all visible (non-hidden) files in a given * directory. Returns the number of files deleted as part of this operation. *///from ww w .j a v a2s . c om public static int deleteAllVisibleFiles(Path directory) throws IOException { FileSystem fs = directory.getFileSystem(CONF); Preconditions.checkState(fs.getFileStatus(directory).isDirectory()); int numFilesDeleted = 0; for (FileStatus fStatus : fs.listStatus(directory)) { // Only delete files that are not hidden. if (fStatus.isFile() && !isHiddenFile(fStatus.getPath().getName())) { LOG.debug("Removing: " + fStatus.getPath()); fs.delete(fStatus.getPath(), false); ++numFilesDeleted; } } return numFilesDeleted; }
From source file:com.cloudera.oryx.lambda.DeleteOldDataFn.java
License:Open Source License
@Override public void call(T ignored) throws IOException { Path dataDirPath = new Path(dataDirString + "/*"); FileSystem fs = FileSystem.get(dataDirPath.toUri(), hadoopConf); FileStatus[] inputPathStatuses = fs.globStatus(dataDirPath); if (inputPathStatuses != null) { long oldestTimeAllowed = System.currentTimeMillis() - TimeUnit.MILLISECONDS.convert(maxAgeHours, TimeUnit.HOURS); Arrays.stream(inputPathStatuses).filter(FileStatus::isDirectory).map(FileStatus::getPath) .filter(subdir -> {// w ww . ja v a 2s. c om Matcher m = dirTimestampPattern.matcher(subdir.getName()); return m.find() && Long.parseLong(m.group(1)) < oldestTimeAllowed; }).forEach(subdir -> { log.info("Deleting old data at {}", subdir); try { fs.delete(subdir, true); } catch (IOException e) { log.warn("Unable to delete {}; continuing", subdir, e); } }); } }
From source file:com.cloudera.oryx.ml.MLUpdate.java
License:Open Source License
@Override public void runUpdate(JavaSparkContext sparkContext, long timestamp, JavaPairRDD<Object, M> newKeyMessageData, JavaPairRDD<Object, M> pastKeyMessageData, String modelDirString, TopicProducer<String, String> modelUpdateTopic) throws IOException, InterruptedException { Objects.requireNonNull(newKeyMessageData); JavaRDD<M> newData = newKeyMessageData.values(); JavaRDD<M> pastData = pastKeyMessageData == null ? null : pastKeyMessageData.values(); if (newData != null) { newData.cache();/*from w w w .j a va 2 s .c o m*/ // This forces caching of the RDD. This shouldn't be necessary but we see some freezes // when many workers try to materialize the RDDs at once. Hence the workaround. newData.foreachPartition(p -> { }); } if (pastData != null) { pastData.cache(); pastData.foreachPartition(p -> { }); } List<HyperParamValues<?>> hyperParamValues = getHyperParameterValues(); int valuesPerHyperParam = HyperParams.chooseValuesPerHyperParam(hyperParamValues.size(), candidates); List<List<?>> hyperParameterCombos = HyperParams.chooseHyperParameterCombos(hyperParamValues, candidates, valuesPerHyperParam); Path modelDir = new Path(modelDirString); Path tempModelPath = new Path(modelDir, ".temporary"); Path candidatesPath = new Path(tempModelPath, Long.toString(System.currentTimeMillis())); FileSystem fs = FileSystem.get(modelDir.toUri(), sparkContext.hadoopConfiguration()); fs.mkdirs(candidatesPath); Path bestCandidatePath = findBestCandidatePath(sparkContext, newData, pastData, hyperParameterCombos, candidatesPath); Path finalPath = new Path(modelDir, Long.toString(System.currentTimeMillis())); if (bestCandidatePath == null) { log.info("Unable to build any model"); } else { // Move best model into place fs.rename(bestCandidatePath, finalPath); } // Then delete everything else fs.delete(candidatesPath, true); if (modelUpdateTopic == null) { log.info("No update topic configured, not publishing models to a topic"); } else { // Push PMML model onto update topic, if it exists Path bestModelPath = new Path(finalPath, MODEL_FILE_NAME); if (fs.exists(bestModelPath)) { FileStatus bestModelPathFS = fs.getFileStatus(bestModelPath); PMML bestModel = null; boolean modelNeededForUpdates = canPublishAdditionalModelData(); boolean modelNotTooLarge = bestModelPathFS.getLen() <= maxMessageSize; if (modelNeededForUpdates || modelNotTooLarge) { // Either the model is required for publishAdditionalModelData, or required because it's going to // be serialized to Kafka try (InputStream in = fs.open(bestModelPath)) { bestModel = PMMLUtils.read(in); } } if (modelNotTooLarge) { modelUpdateTopic.send("MODEL", PMMLUtils.toString(bestModel)); } else { modelUpdateTopic.send("MODEL-REF", fs.makeQualified(bestModelPath).toString()); } if (modelNeededForUpdates) { publishAdditionalModelData(sparkContext, bestModel, newData, pastData, finalPath, modelUpdateTopic); } } } if (newData != null) { newData.unpersist(); } if (pastData != null) { pastData.unpersist(); } }
From source file:com.cloudera.recordservice.examples.mapreduce.RecordCount.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length != 2) { System.err.println("Usage: RecordCount <input_query> <output_path>"); System.exit(1);// ww w .ja v a 2s . c om } String inputQuery = args[0]; String output = args[1]; Job job = Job.getInstance(getConf()); job.setJobName("recordcount"); job.setJarByClass(RecordCount.class); job.setMapperClass(Map.class); job.setCombinerClass(Reduce.class); job.setReducerClass(Reduce.class); job.setNumReduceTasks(1); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(LongWritable.class); RecordServiceConfig.setInputQuery(job.getConfiguration(), inputQuery); job.setInputFormatClass(RecordServiceInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileSystem fs = FileSystem.get(job.getConfiguration()); Path outputPath = new Path(output); if (fs.exists(outputPath)) fs.delete(outputPath, true); FileOutputFormat.setOutputPath(job, outputPath); return job.waitForCompletion(true) ? 0 : 1; }