List of usage examples for org.apache.hadoop.fs FileSystem delete
public abstract boolean delete(Path f, boolean recursive) throws IOException;
From source file:com.redsqirl.workflow.server.OozieManager.java
License:Open Source License
/** * Clean the directory where the Job details are stored * /*from w w w . ja v a2 s. com*/ * @param nameWf * @throws RemoteException */ public void cleanJobDirectory(final String nameWf) throws RemoteException { Path hdfsWfPath = new Path(WorkflowPrefManager.getHDFSPathJobs()); FileSystem fs = null; int numberToKeep = WorkflowPrefManager.getNbOozieDirToKeep(); try { fs = NameNodeVar.getFS(); FileStatus[] children = fs.listStatus(hdfsWfPath, new PathFilter() { @Override public boolean accept(Path arg0) { return arg0.getName().startsWith(nameWf + "_"); } }); Arrays.sort(children, 0, children.length, new Comparator<FileStatus>() { @Override public int compare(FileStatus arg0, FileStatus arg1) { return (int) ((arg0.getModificationTime() - arg1.getModificationTime()) / 10000); } }); for (int i = 0; i < children.length - numberToKeep; ++i) { fs.delete(children[i].getPath(), true); } } catch (Exception e1) { logger.error(e1); } }
From source file:com.redsqirl.workflow.server.Workflow.java
License:Open Source License
/** * Clean the backup directory//from w ww. jav a 2 s . co m * * @throws IOException */ public void cleanUpBackup() throws IOException { String path = WorkflowPrefManager.getBackupPath(); int nbBackup = WorkflowPrefManager.getNbBackup(); FileSystem fs = NameNodeVar.getFS(); // FileStatus stat = fs.getFileStatus(new Path(path)); FileStatus[] fsA = fs.listStatus(new Path(path), new PathFilter() { @Override public boolean accept(Path arg0) { return arg0.getName().matches(".*[0-9]{14}(.rs|.srs)$"); } }); logger.debug("Backup directory: " + fsA.length + " files, " + nbBackup + " to keep, " + Math.max(0, fsA.length - nbBackup) + " to remove"); if (fsA.length > nbBackup) { int numberToRemove = fsA.length - nbBackup; Map<Path, Long> pathToRemove = new HashMap<Path, Long>(); Path pathMin = null; Long min = Long.MAX_VALUE; for (FileStatus stat : fsA) { if (pathToRemove.size() < numberToRemove) { pathToRemove.put(stat.getPath(), stat.getModificationTime()); } else if (min > stat.getModificationTime()) { pathToRemove.remove(pathMin); pathToRemove.put(stat.getPath(), stat.getModificationTime()); } if (min > stat.getModificationTime()) { min = stat.getModificationTime(); pathMin = stat.getPath(); } } for (Path pathDel : pathToRemove.keySet()) { fs.delete(pathDel, false); } } // fs.close(); }
From source file:com.redsqirl.workflow.server.Workflow.java
License:Open Source License
/** * Backup the workflow/*from w ww. j a va 2s .c o m*/ * * @throws RemoteException */ public String backup() throws RemoteException { String path = getBackupName(createBackupDir()); boolean save_swp = isSaved(); logger.debug("back up path " + path); String error = save(path); saved = save_swp; try { if (error != null) { logger.warn("Fail to back up: " + error); FileSystem fs = NameNodeVar.getFS(); fs.delete(new Path(path), false); } logger.debug("Clean up back up"); cleanUpBackup(); } catch (Exception e) { logger.warn(e.getMessage()); logger.warn("Failed cleaning up backup directory"); } return path; }
From source file:com.revolutionanalytics.hadoop.hdfs.FileUtils.java
License:Apache License
private static void delete(Configuration cfg, FileSystem srcFS, Path src, boolean recursive) throws IOException { Trash trashTmp = new Trash(srcFS, cfg); if (trashTmp.moveToTrash(src)) { System.out.println("Moved to trash: " + src); return;/* www . j a v a2s.co m*/ } if (srcFS.delete(src, true)) { System.out.println("Deleted " + src); } else { if (!srcFS.exists(src)) { throw new FileNotFoundException("cannot remove " + src + ": No such file or directory."); } throw new IOException("Delete failed " + src); } }
From source file:com.ricemap.spateDB.operations.FileMBR.java
License:Apache License
/** * Counts the exact number of lines in a file by issuing a MapReduce job * that does the thing//from w ww.j a va2 s . c o m * @param conf * @param fs * @param file * @return * @throws IOException */ public static <S extends Shape> Prism fileMBRMapReduce(FileSystem fs, Path file, S stockShape, boolean background) throws IOException { // Quickly get file MBR if it is globally indexed GlobalIndex<Partition> globalIndex = SpatialSite.getGlobalIndex(fs, file); if (globalIndex != null) { // Return the MBR of the global index. // Compute file size by adding up sizes of all files assuming they are // not compressed long totalLength = 0; for (Partition p : globalIndex) { Path filePath = new Path(file, p.filename); if (fs.exists(filePath)) totalLength += fs.getFileStatus(filePath).getLen(); } sizeOfLastProcessedFile = totalLength; return globalIndex.getMBR(); } JobConf job = new JobConf(FileMBR.class); Path outputPath; FileSystem outFs = FileSystem.get(job); do { outputPath = new Path(file.toUri().getPath() + ".mbr_" + (int) (Math.random() * 1000000)); } while (outFs.exists(outputPath)); job.setJobName("FileMBR"); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(Prism.class); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); job.setCombinerClass(Reduce.class); ClusterStatus clusterStatus = new JobClient(job).getClusterStatus(); job.setNumMapTasks(clusterStatus.getMaxMapTasks() * 5); job.setInputFormat(ShapeInputFormat.class); SpatialSite.setShapeClass(job, stockShape.getClass()); job.setOutputFormat(TextOutputFormat.class); ShapeInputFormat.setInputPaths(job, file); TextOutputFormat.setOutputPath(job, outputPath); job.setOutputCommitter(MBROutputCommitter.class); // Submit the job if (background) { JobClient jc = new JobClient(job); lastSubmittedJob = jc.submitJob(job); return null; } else { lastSubmittedJob = JobClient.runJob(job); Counters counters = lastSubmittedJob.getCounters(); Counter inputBytesCounter = counters.findCounter(Task.Counter.MAP_INPUT_BYTES); FileMBR.sizeOfLastProcessedFile = inputBytesCounter.getValue(); // Read job result FileStatus[] results = outFs.listStatus(outputPath); Prism mbr = new Prism(); for (FileStatus fileStatus : results) { if (fileStatus.getLen() > 0 && fileStatus.getPath().getName().startsWith("part-")) { LineReader lineReader = new LineReader(outFs.open(fileStatus.getPath())); Text text = new Text(); if (lineReader.readLine(text) > 0) { mbr.fromText(text); } lineReader.close(); } } outFs.delete(outputPath, true); return mbr; } }
From source file:com.ricemap.spateDB.operations.LineRandomizer.java
License:Apache License
/** * Counts the exact number of lines in a file by issuing a MapReduce job * that does the thing//from ww w . j a v a 2 s . com * @param conf * @param infs * @param infile * @return * @throws IOException */ public static void randomizerMapReduce(Path infile, Path outfile, boolean overwrite) throws IOException { JobConf job = new JobConf(LineRandomizer.class); FileSystem outfs = outfile.getFileSystem(job); if (overwrite) outfs.delete(outfile, true); job.setJobName("Randomizer"); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(Text.class); job.setMapperClass(Map.class); ClusterStatus clusterStatus = new JobClient(job).getClusterStatus(); job.setNumMapTasks(clusterStatus.getMaxMapTasks() * 5); job.setReducerClass(Reduce.class); job.setNumReduceTasks(Math.max(1, clusterStatus.getMaxReduceTasks())); FileSystem infs = infile.getFileSystem(job); int numOfPartitions = (int) Math .ceil((double) infs.getFileStatus(infile).getLen() / infs.getDefaultBlockSize(outfile)); job.setInt(NumOfPartitions, numOfPartitions); job.setInputFormat(TextInputFormat.class); TextInputFormat.setInputPaths(job, infile); job.setOutputFormat(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, outfile); // Submit the job JobClient.runJob(job); }
From source file:com.ricemap.spateDB.operations.Plot.java
License:Apache License
/** * @param args//from w ww . j a v a 2s . co m * @throws IOException */ public static void main(String[] args) throws IOException { System.setProperty("java.awt.headless", "true"); CommandLineArguments cla = new CommandLineArguments(args); JobConf conf = new JobConf(Plot.class); Path[] files = cla.getPaths(); if (files.length < 2) { printUsage(); throw new RuntimeException("Illegal arguments. File names missing"); } Path inFile = files[0]; FileSystem inFs = inFile.getFileSystem(conf); if (!inFs.exists(inFile)) { printUsage(); throw new RuntimeException("Input file does not exist"); } boolean overwrite = cla.isOverwrite(); Path outFile = files[1]; FileSystem outFs = outFile.getFileSystem(conf); if (outFs.exists(outFile)) { if (overwrite) outFs.delete(outFile, true); else throw new RuntimeException("Output file exists and overwrite flag is not set"); } boolean showBorders = cla.is("borders"); boolean showBlockCount = cla.is("showblockcount"); boolean showRecordCount = cla.is("showrecordcount"); Shape shape = cla.getShape(true); int width = cla.getWidth(1000); int height = cla.getHeight(1000); Color color = cla.getColor(); plot(inFile, outFile, shape, width, height, color, showBorders, showBlockCount, showRecordCount); System.out.println("Values range: [" + min_value + "," + max_value + "]"); }
From source file:com.ricemap.spateDB.operations.RangeQuery.java
License:Apache License
/** * Performs a range query using MapReduce * /*from w ww. j ava 2 s .c o m*/ * @param fs * @param inputFile * @param queryRange * @param shape * @param output * @return * @throws IOException */ public static long rangeQueryMapReduce(FileSystem fs, Path inputFile, Path userOutputPath, Shape queryShape, Shape shape, boolean overwrite, boolean background, QueryInput query) throws IOException { JobConf job = new JobConf(FileMBR.class); FileSystem outFs = inputFile.getFileSystem(job); Path outputPath = userOutputPath; if (outputPath == null) { do { outputPath = new Path( inputFile.toUri().getPath() + ".rangequery_" + (int) (Math.random() * 1000000)); } while (outFs.exists(outputPath)); } else { if (outFs.exists(outputPath)) { if (overwrite) { outFs.delete(outputPath, true); } else { throw new RuntimeException("Output path already exists and -overwrite flag is not set"); } } } job.setJobName("RangeQuery"); job.setClass(SpatialSite.FilterClass, RangeFilter.class, BlockFilter.class); RangeFilter.setQueryRange(job, queryShape); // Set query range for // filter ClusterStatus clusterStatus = new JobClient(job).getClusterStatus(); job.setNumMapTasks(clusterStatus.getMaxMapTasks() * 5); job.setNumReduceTasks(3); // Decide which map function to use depending on how blocks are indexed // And also which input format to use if (SpatialSite.isRTree(fs, inputFile)) { // RTree indexed file LOG.info("Searching an RTree indexed file"); job.setInputFormat(RTreeInputFormat.class); } else { // A file with no local index LOG.info("Searching a non local-indexed file"); job.setInputFormat(ShapeInputFormat.class); } GlobalIndex<Partition> gIndex = SpatialSite.getGlobalIndex(fs, inputFile); // if (gIndex != null && gIndex.isReplicated()){ // job.setMapperClass(RangeQueryMap.class); Class<?> OutputKey = NullWritable.class; try { Class<?> c = shape.getClass(); Field f = c.getDeclaredField(query.field); f.setAccessible(true); if (f.getType().equals(Integer.TYPE)) { OutputKey = IntWritable.class; } else if (f.getType().equals(Double.TYPE)) { OutputKey = DoubleWritable.class; } else if (f.getType().equals(Long.TYPE)) { OutputKey = LongWritable.class; } } catch (SecurityException e) { e.printStackTrace(); } catch (NoSuchFieldException e) { // TODO Auto-generated catch block e.printStackTrace(); } job.setMapOutputKeyClass(OutputKey); switch (query.type) { case Distinct: job.setMapperClass(DistinctQueryMap.class); job.setReducerClass(DistinctQueryReduce.class); job.setMapOutputValueClass(NullWritable.class); break; case Distribution: job.setMapperClass(DistributionQueryMap.class); job.setReducerClass(DistributionQueryReduce.class); job.setMapOutputValueClass(IntWritable.class); break; default: break; } // } // else // job.setMapperClass(RangeQueryMapNoDupAvoidance.class); // Set query range for the map function job.set(QUERY_SHAPE_CLASS, queryShape.getClass().getName()); job.set(QUERY_SHAPE, queryShape.toText(new Text()).toString()); job.set(QUERY_FIELD, query.field); // Set shape class for the SpatialInputFormat SpatialSite.setShapeClass(job, shape.getClass()); job.setOutputFormat(TextOutputFormat.class); ShapeInputFormat.setInputPaths(job, inputFile); TextOutputFormat.setOutputPath(job, outputPath); // Submit the job if (!background) { RunningJob runningJob = JobClient.runJob(job); Counters counters = runningJob.getCounters(); Counter outputRecordCounter = counters.findCounter(Task.Counter.MAP_OUTPUT_RECORDS); final long resultCount = outputRecordCounter.getValue(); // If outputPath not set by user, automatically delete it if (userOutputPath == null) outFs.delete(outputPath, true); return resultCount; } else { JobClient jc = new JobClient(job); lastRunningJob = jc.submitJob(job); return -1; } }
From source file:com.ricemap.spateDB.operations.RecordCount.java
License:Apache License
/** * Counts the exact number of lines in a file by issuing a MapReduce job * that does the thing/*from w w w. j av a2 s. c o m*/ * @param conf * @param fs * @param file * @return * @throws IOException */ public static long recordCountMapReduce(FileSystem fs, Path file) throws IOException { JobConf job = new JobConf(RecordCount.class); Path outputPath = new Path(file.toUri().getPath() + ".linecount"); FileSystem outFs = outputPath.getFileSystem(job); outFs.delete(outputPath, true); job.setJobName("LineCount"); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(LongWritable.class); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); job.setCombinerClass(Reduce.class); ClusterStatus clusterStatus = new JobClient(job).getClusterStatus(); job.setNumMapTasks(clusterStatus.getMaxMapTasks() * 5); job.setNumReduceTasks(1); job.setInputFormat(ShapeLineInputFormat.class); job.setOutputFormat(TextOutputFormat.class); ShapeLineInputFormat.setInputPaths(job, file); TextOutputFormat.setOutputPath(job, outputPath); // Submit the job JobClient.runJob(job); // Read job result long lineCount = 0; FileStatus[] results = outFs.listStatus(outputPath); for (FileStatus fileStatus : results) { if (fileStatus.getLen() > 0 && fileStatus.getPath().getName().startsWith("part-")) { LineReader lineReader = new LineReader(outFs.open(fileStatus.getPath())); Text text = new Text(); if (lineReader.readLine(text) > 0) { lineCount = Long.parseLong(text.toString()); } lineReader.close(); } } outFs.delete(outputPath, true); return lineCount; }
From source file:com.ricemap.spateDB.operations.Repartition.java
License:Apache License
/** * Repartitions an input file according to the given list of cells. * @param inFile/*from www . j a va2 s.c o m*/ * @param outPath * @param cellInfos * @param pack * @param rtree * @param overwrite * @throws IOException */ public static void repartitionMapReduce(Path inFile, Path outPath, Shape stockShape, long blockSize, CellInfo[] cellInfos, String sindex, boolean overwrite, boolean columnar) throws IOException { JobConf job = new JobConf(Repartition.class); job.setJobName("Repartition"); FileSystem outFs = outPath.getFileSystem(job); // Overwrite output file if (outFs.exists(outPath)) { if (overwrite) outFs.delete(outPath, true); else throw new RuntimeException( "Output file '" + outPath + "' already exists and overwrite flag is not set"); } // Decide which map function to use depending on the type of global index if (sindex.equals("rtree")) { // Repartition without replication job.setMapperClass(RepartitionMapNoReplication.class); } else { // Repartition with replication (grid and r+tree) job.setMapperClass(RepartitionMap.class); } job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(stockShape.getClass()); ShapeInputFormat.setInputPaths(job, inFile); job.setInputFormat(ShapeInputFormat.class); boolean pack = sindex.equals("r+tree"); boolean expand = sindex.equals("rtree"); job.setBoolean(SpatialSite.PACK_CELLS, pack); job.setBoolean(SpatialSite.EXPAND_CELLS, expand); job.setStrings(SpatialSite.STORAGE_MODE, columnar ? "columnar" : "normal"); ClusterStatus clusterStatus = new JobClient(job).getClusterStatus(); job.setNumMapTasks(10 * Math.max(1, clusterStatus.getMaxMapTasks())); // Set default parameters for reading input file SpatialSite.setShapeClass(job, stockShape.getClass()); FileOutputFormat.setOutputPath(job, outPath); if (sindex.equals("grid")) { job.setOutputFormat(GridOutputFormat.class); } else if (sindex.equals("rtree") || sindex.equals("r+tree")) { // For now, the two types of local index are the same job.setOutputFormat(RTreeGridOutputFormat.class); } else { throw new RuntimeException("Unsupported spatial index: " + sindex); } // Copy block size from source file if it's globally indexed FileSystem inFs = inFile.getFileSystem(job); if (blockSize == 0) { GlobalIndex<Partition> globalIndex = SpatialSite.getGlobalIndex(inFs, inFile); if (globalIndex != null) { blockSize = inFs.getFileStatus(new Path(inFile, globalIndex.iterator().next().filename)) .getBlockSize(); LOG.info("Automatically setting block size to " + blockSize); } } if (blockSize != 0) job.setLong(SpatialSite.LOCAL_INDEX_BLOCK_SIZE, blockSize); SpatialSite.setCells(job, cellInfos); job.setBoolean(SpatialSite.OVERWRITE, overwrite); // Set reduce function job.setReducerClass(RepartitionReduce.class); job.setNumReduceTasks( Math.max(1, Math.min(cellInfos.length, (clusterStatus.getMaxReduceTasks() * 9 + 5) / 10))); // Set output committer that combines output files together job.setOutputCommitter(RepartitionOutputCommitter.class); JobClient.runJob(job); }