List of usage examples for org.apache.hadoop.fs FileSystem delete
public abstract boolean delete(Path f, boolean recursive) throws IOException;
From source file:co.cask.cdap.data.hbase.HBase94Test.java
License:Apache License
@Override public HRegion createHRegion(byte[] tableName, byte[] startKey, byte[] stopKey, String callingMethod, Configuration conf, byte[]... families) throws IOException { if (conf == null) { conf = new Configuration(); }// w ww . j a va2 s . c o m HTableDescriptor htd = new HTableDescriptor(tableName); for (byte[] family : families) { htd.addFamily(new HColumnDescriptor(family)); } HRegionInfo info = new HRegionInfo(htd.getName(), startKey, stopKey, false); Path path = new Path(conf.get(HConstants.HBASE_DIR), callingMethod); FileSystem fs = FileSystem.get(conf); if (fs.exists(path)) { if (!fs.delete(path, true)) { throw new IOException("Failed delete of " + path); } } return HRegion.createHRegion(info, path, conf, htd); }
From source file:co.cask.cdap.internal.app.runtime.batch.dataset.partitioned.DynamicPartitioningOutputCommitter.java
License:Apache License
@Override public void cleanupJob(JobContext context) throws IOException { FileSystem fs = jobSpecificOutputPath.getFileSystem(context.getConfiguration()); fs.delete(jobSpecificOutputPath, true); }
From source file:co.cask.cdap.internal.app.runtime.batch.dataset.partitioned.DynamicPartitioningOutputCommitter.java
License:Apache License
/** * Merge two paths together. Anything in from will be moved into to, if there * are any name conflicts while merging the files or directories in from win. * @param fs the File System to use//from w w w. ja v a 2 s .co m * @param from the path data is coming from. * @param to the path data is going to. * @throws IOException on any error */ private void mergePaths(FileSystem fs, final FileStatus from, final Path to) throws IOException { if (from.isFile()) { if (fs.exists(to)) { if (!fs.delete(to, true)) { throw new IOException("Failed to delete " + to); } } if (!fs.rename(from.getPath(), to)) { throw new IOException("Failed to rename " + from + " to " + to); } } else if (from.isDirectory()) { if (fs.exists(to)) { FileStatus toStat = fs.getFileStatus(to); if (!toStat.isDirectory()) { if (!fs.delete(to, true)) { throw new IOException("Failed to delete " + to); } if (!fs.rename(from.getPath(), to)) { throw new IOException("Failed to rename " + from + " to " + to); } } else { //It is a directory so merge everything in the directories for (FileStatus subFrom : fs.listStatus(from.getPath())) { Path subTo = new Path(to, subFrom.getPath().getName()); mergePaths(fs, subFrom, subTo); } } } else { //it does not exist just rename if (!fs.rename(from.getPath(), to)) { throw new IOException("Failed to rename " + from + " to " + to); } } } }
From source file:co.cask.hydrator.plugin.batch.action.FileAction.java
License:Apache License
@SuppressWarnings("ConstantConditions") @Override//w ww. j a va 2 s . c o m public void run(BatchActionContext context) throws Exception { if (!config.shouldRun(context)) { return; } config.substituteMacros(context); Job job = JobUtils.createInstance(); Configuration conf = job.getConfiguration(); FileSystem fileSystem = FileSystem.get(conf); Path[] paths; Path sourcePath = new Path(config.path); if (fileSystem.isDirectory(sourcePath)) { FileStatus[] status = fileSystem.listStatus(sourcePath); paths = FileUtil.stat2Paths(status); } else { paths = new Path[] { sourcePath }; } //get regex pattern for file name filtering. boolean patternSpecified = !Strings.isNullOrEmpty(config.pattern); if (patternSpecified) { regex = Pattern.compile(config.pattern); } switch (config.action.toLowerCase()) { case "delete": for (Path path : paths) { if (!patternSpecified || isFileNameMatch(path.getName())) { fileSystem.delete(path, true); } } break; case "move": for (Path path : paths) { if (!patternSpecified || isFileNameMatch(path.getName())) { Path targetFileMovePath = new Path(config.targetFolder, path.getName()); fileSystem.rename(path, targetFileMovePath); } } break; case "archive": for (Path path : paths) { if (!patternSpecified || isFileNameMatch(path.getName())) { try (FSDataOutputStream archivedStream = fileSystem .create(new Path(config.targetFolder, path.getName() + ".zip")); ZipOutputStream zipArchivedStream = new ZipOutputStream(archivedStream); FSDataInputStream fdDataInputStream = fileSystem.open(path)) { zipArchivedStream.putNextEntry(new ZipEntry(path.getName())); int length; byte[] buffer = new byte[1024]; while ((length = fdDataInputStream.read(buffer)) > 0) { zipArchivedStream.write(buffer, 0, length); } zipArchivedStream.closeEntry(); } fileSystem.delete(path, true); } } break; default: LOG.warn("No action required on the file."); break; } }
From source file:colossal.pipe.ColFile.java
License:Apache License
public void clearAndPrepareOutput(Configuration conf) { try {// w w w.j av a 2s . c om Path dfsPath = new Path(path); FileSystem fs = dfsPath.getFileSystem(conf); if (fs.exists(dfsPath)) { FileStatus[] statuses = fs.listStatus(dfsPath); for (FileStatus status : statuses) { if (status.isDir()) { if (!status.getPath().toString().endsWith("/_logs") && !status.getPath().toString().endsWith("/_temporary")) { throw new IllegalArgumentException( "Trying to overwrite directory with child directories: " + path); } } } } else { fs.mkdirs(dfsPath); } fs.delete(dfsPath, true); } catch (IOException e) { throw new RuntimeException(e); } }
From source file:com.alexholmes.hadooputils.sort.SortInputSampler.java
License:Apache License
public static <K, V> void writePartitionFile(JobConf job, Sampler<K, V> sampler) throws IOException { Configuration conf = job;/* w w w . j av a 2 s . com*/ // Use the input format defined in the job. NOT, the one provided by // the parent class's writePartitionFile() method, which will be a plain // TextInputFormat, by default final InputFormat inf = job.getInputFormat(); int numPartitions = job.getNumReduceTasks(); K[] samples = (K[]) sampler.getSample(inf, job); RawComparator<K> comparator = (RawComparator<K>) job.getOutputKeyComparator(); Arrays.sort(samples, comparator); Path dst = new Path(TotalOrderPartitioner.getPartitionFile(job)); FileSystem fs = dst.getFileSystem(conf); if (fs.exists(dst)) { fs.delete(dst, false); } SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, dst, job.getMapOutputKeyClass(), NullWritable.class); NullWritable nullValue = NullWritable.get(); float stepSize = samples.length / (float) numPartitions; int last = -1; for (int i = 1; i < numPartitions; ++i) { int k = Math.round(stepSize * i); while (last >= k && comparator.compare(samples[last], samples[k]) == 0) { ++k; } writer.append(samples[k], nullValue); last = k; } writer.close(); }
From source file:com.alexholmes.hdfsslurper.WorkerThread.java
License:Apache License
private void process(FileStatus srcFileStatus) throws IOException, InterruptedException { Path stagingFile = null;//ww w . j a v a 2 s. co m FileSystem destFs = null; String filenameBatchidDelimiter = config.getFileNameBatchIdDelimiter(); try { FileSystem srcFs = srcFileStatus.getPath().getFileSystem(config.getConfig()); // run a script which can change the name of the file as well as // write out a new version of the file // if (config.getWorkScript() != null) { Path newSrcFile = stageSource(srcFileStatus); srcFileStatus = srcFileStatus.getPath().getFileSystem(config.getConfig()).getFileStatus(newSrcFile); } Path srcFile = srcFileStatus.getPath(); // get the target HDFS file // Path destFile = getHdfsTargetPath(srcFileStatus); if (config.getCodec() != null) { String ext = config.getCodec().getDefaultExtension(); if (!destFile.getName().endsWith(ext)) { destFile = new Path(destFile.toString() + ext); } } destFs = destFile.getFileSystem(config.getConfig()); // get the staging HDFS file // stagingFile = fileSystemManager.getStagingFile(srcFileStatus, destFile); String batchId = srcFile.toString().substring( srcFile.toString().lastIndexOf(filenameBatchidDelimiter) + 1, srcFile.toString().length()); log.info("event#Copying source file '" + srcFile + "' to staging destination '" + stagingFile + "'" + "$batchId#" + batchId); // if the directory of the target file doesn't exist, attempt to // create it // Path destParentDir = destFile.getParent(); if (!destFs.exists(destParentDir)) { log.info("event#Attempting creation of target directory: " + destParentDir.toUri()); if (!destFs.mkdirs(destParentDir)) { throw new IOException("event#Failed to create target directory: " + destParentDir.toUri()); } } // if the staging directory doesn't exist, attempt to create it // Path destStagingParentDir = stagingFile.getParent(); if (!destFs.exists(destStagingParentDir)) { log.info("event#Attempting creation of staging directory: " + destStagingParentDir.toUri()); if (!destFs.mkdirs(destStagingParentDir)) { throw new IOException("event#Failed to create staging directory: " + destParentDir.toUri()); } } // copy the file // InputStream is = null; OutputStream os = null; CRC32 crc = new CRC32(); try { is = new BufferedInputStream(srcFs.open(srcFile)); if (config.isVerify()) { is = new CheckedInputStream(is, crc); } os = destFs.create(stagingFile); if (config.getCodec() != null) { os = config.getCodec().createOutputStream(os); } IOUtils.copyBytes(is, os, 4096, false); } finally { IOUtils.closeStream(is); IOUtils.closeStream(os); } long srcFileSize = srcFs.getFileStatus(srcFile).getLen(); long destFileSize = destFs.getFileStatus(stagingFile).getLen(); if (config.getCodec() == null && srcFileSize != destFileSize) { throw new IOException( "event#File sizes don't match, source = " + srcFileSize + ", dest = " + destFileSize); } log.info("event#Local file size = " + srcFileSize + ", HDFS file size = " + destFileSize + "$batchId#" + batchId); if (config.isVerify()) { verify(stagingFile, crc.getValue()); } if (destFs.exists(destFile)) { destFs.delete(destFile, false); } log.info("event#Moving staging file '" + stagingFile + "' to destination '" + destFile + "'" + "$batchId#" + batchId); if (!destFs.rename(stagingFile, destFile)) { throw new IOException("event#Failed to rename file"); } if (config.isCreateLzopIndex() && destFile.getName().endsWith(lzopExt)) { Path lzoIndexPath = new Path(destFile.toString() + LzoIndex.LZO_INDEX_SUFFIX); if (destFs.exists(lzoIndexPath)) { log.info("event#Deleting index file as it already exists"); destFs.delete(lzoIndexPath, false); } indexer.index(destFile); } fileSystemManager.fileCopyComplete(srcFileStatus); } catch (Throwable t) { log.error("event#Caught exception working on file " + srcFileStatus.getPath(), t); // delete the staging file if it still exists // try { if (destFs != null && destFs.exists(stagingFile)) { destFs.delete(stagingFile, false); } } catch (Throwable t2) { log.error("event#Failed to delete staging file " + stagingFile, t2); } fileSystemManager.fileCopyError(srcFileStatus); } }
From source file:com.alibaba.jstorm.hdfs.spout.DirLock.java
License:Apache License
private static DirLock takeOwnership(FileSystem fs, Path dirLockFile) throws IOException { if (fs instanceof DistributedFileSystem) { if (!((DistributedFileSystem) fs).recoverLease(dirLockFile)) { LOG.warn("Unable to recover lease on dir lock file " + dirLockFile + " right now. Cannot transfer ownership. Will need to try later."); return null; }// ww w .j a va 2 s . c o m } // delete and recreate lock file if (fs.delete(dirLockFile, false)) { // returns false if somebody else already deleted it (to take ownership) FSDataOutputStream ostream = HdfsUtils.tryCreateFile(fs, dirLockFile); if (ostream != null) { ostream.close(); } return new DirLock(fs, dirLockFile); } return null; }
From source file:com.alibaba.jstorm.hdfs.spout.FileLock.java
License:Apache License
/** * checks if lockFile is older than 'olderThan' UTC time by examining the modification time * on file and (if necessary) the timestamp in last log entry in the file. If its stale, then * returns the last log entry, else returns null. * @param fs/*from www. ja v a 2 s. c om*/ * @param lockFile * @param olderThan time (millis) in UTC. * @return the last entry in the file if its too old. null if last entry is not too old * @throws IOException */ public static LogEntry getLastEntryIfStale(FileSystem fs, Path lockFile, long olderThan) throws IOException { long modifiedTime = fs.getFileStatus(lockFile).getModificationTime(); if (modifiedTime <= olderThan) { // look //Impt: HDFS timestamp may not reflect recent appends, so we double check the // timestamp in last line of file to see when the last update was made LogEntry lastEntry = getLastEntry(fs, lockFile); if (lastEntry == null) { LOG.warn("Empty lock file found. Deleting it. {}", lockFile); try { if (!fs.delete(lockFile, false)) throw new IOException("Empty lock file deletion failed"); } catch (Exception e) { LOG.error("Unable to delete empty lock file " + lockFile, e); } } if (lastEntry.eventTime <= olderThan) return lastEntry; } return null; }
From source file:com.anhth12.lambda.ml.MLUpdate.java
@Override public void runUpdate(JavaSparkContext sparkContext, long timestamp, JavaPairRDD<String, M> newKeyMessageData, JavaPairRDD<String, M> pastKeyMessageData, String modelDirString, TopicProducer<String, String> modelUpdateTopic) throws IOException, InterruptedException { Preconditions.checkNotNull(newKeyMessageData); JavaRDD<M> newData = newKeyMessageData.values(); JavaRDD<M> pastData = pastKeyMessageData == null ? null : pastKeyMessageData.values(); if (newData != null) { newData.cache();/* w w w.j a v a 2 s . c o m*/ newData.foreachPartition(Functions.<Iterator<M>>noOp()); } if (pastData != null) { pastData.cache(); pastData.foreachPartition(Functions.<Iterator<M>>noOp()); } List<HyperParamValues<?>> hyperParamValues = getHyperParamValues(); int valuesPerHyperParam = HyperParams.chooseValuesPerHyperParam(hyperParamValues.size(), candidates); List<List<?>> hyperParameterCombos = HyperParams.chooseHyperParameterCombos(hyperParamValues, candidates, valuesPerHyperParam); FileSystem fs = FileSystem.get(sparkContext.hadoopConfiguration()); Path modelDir = new Path(modelDirString); Path tempModelPath = new Path(modelDir, ".temporary"); Path candiatesPath = new Path(tempModelPath, Long.toString(System.currentTimeMillis())); fs.mkdirs(candiatesPath); Path bestCandidatePath = findBestCandidatePath(sparkContext, newData, pastData, hyperParameterCombos, candiatesPath); Path finalPath = new Path(modelDir, Long.toString(System.currentTimeMillis())); if (bestCandidatePath == null) { log.info("Unable to build any model"); } else { fs.rename(bestCandidatePath, finalPath); } fs.delete(candiatesPath, true); Path bestModelPath = new Path(finalPath, MODEL_FILE_NAME); if (fs.exists(bestModelPath)) { PMML bestModel; try (InputStream in = new GZIPInputStream(fs.open(finalPath), 1 << 16)) { bestModel = PMMLUtils.read(in); } modelUpdateTopic.send("MODEL", PMMLUtils.toString(bestModel)); publishAdditionalModelData(sparkContext, bestModel, newData, pastData, candiatesPath, modelUpdateTopic); } if (newData != null) { newData.unpersist(); } if (pastData != null) { pastData.unpersist(); } }