List of usage examples for org.apache.hadoop.fs FileSystem delete
public abstract boolean delete(Path f, boolean recursive) throws IOException;
From source file:com.ricemap.spateDB.operations.Repartition.java
License:Apache License
/** * Repartitions a file on local machine without MapReduce jobs. * @param inFs/* w w w . j a v a2 s .c o m*/ * @param in * @param outFs * @param out * @param cells * @param stockShape * @param rtree * @param overwrite * @throws IOException */ public static <S extends Shape> void repartitionLocal(Path in, Path out, S stockShape, long blockSize, CellInfo[] cells, String sindex, boolean overwrite) throws IOException { FileSystem inFs = in.getFileSystem(new Configuration()); FileSystem outFs = out.getFileSystem(new Configuration()); // Overwrite output file if (outFs.exists(out)) { if (overwrite) outFs.delete(out, true); else throw new RuntimeException( "Output file '" + out + "' already exists and overwrite flag is not set"); } outFs.mkdirs(out); ShapeRecordWriter<Shape> writer; boolean pack = sindex.equals("r+tree"); boolean expand = sindex.equals("rtree"); if (sindex.equals("grid")) { writer = new GridRecordWriter<Shape>(out, null, null, cells, pack, expand); } else if (sindex.equals("rtree") || sindex.equals("r+tree")) { writer = new RTreeGridRecordWriter<Shape>(out, null, null, cells, pack, expand); writer.setStockObject(stockShape); } else { throw new RuntimeException("Unupoorted spatial idnex: " + sindex); } FileStatus inFileStatus = inFs.getFileStatus(in); // Copy blocksize from source file if it's globally indexed if (blockSize == 0) { GlobalIndex<Partition> globalIndex = SpatialSite.getGlobalIndex(inFs, in); if (globalIndex != null) { blockSize = inFs.getFileStatus(new Path(in, globalIndex.iterator().next().filename)).getBlockSize(); } } if (blockSize != 0) ((GridRecordWriter<Shape>) writer).setBlockSize(blockSize); long length = inFileStatus.getLen(); FSDataInputStream datain = inFs.open(in); ShapeRecordReader<S> reader = new ShapeRecordReader<S>(datain, 0, length); Prism c = reader.createKey(); NullWritable dummy = NullWritable.get(); while (reader.next(c, stockShape)) { writer.write(dummy, stockShape); } writer.close(null); }
From source file:com.ricemap.spateDB.operations.Sampler.java
License:Apache License
/** * Sample a ratio of the file through a MapReduce job * @param fs/* w w w . j a v a 2 s. co m*/ * @param files * @param ratio * @param threshold - Maximum number of elements to be sampled * @param output * @param inObj * @return * @throws IOException */ public static <T extends TextSerializable, O extends TextSerializable> int sampleMapReduceWithRatio( FileSystem fs, Path[] files, double ratio, long threshold, long seed, final ResultCollector<O> output, T inObj, O outObj) throws IOException { JobConf job = new JobConf(FileMBR.class); Path outputPath; FileSystem outFs = FileSystem.get(job); do { outputPath = new Path(files[0].toUri().getPath() + ".sample_" + (int) (Math.random() * 1000000)); } while (outFs.exists(outputPath)); job.setJobName("Sample"); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(Text.class); job.setClass(InClass, inObj.getClass(), TextSerializable.class); job.setClass(OutClass, outObj.getClass(), TextSerializable.class); job.setMapperClass(Map.class); job.setLong(RANDOM_SEED, seed); job.setFloat(SAMPLE_RATIO, (float) ratio); ClusterStatus clusterStatus = new JobClient(job).getClusterStatus(); job.setNumMapTasks(clusterStatus.getMaxMapTasks() * 5); job.setNumReduceTasks(0); job.setInputFormat(ShapeLineInputFormat.class); job.setOutputFormat(TextOutputFormat.class); ShapeLineInputFormat.setInputPaths(job, files); TextOutputFormat.setOutputPath(job, outputPath); // Submit the job RunningJob run_job = JobClient.runJob(job); Counters counters = run_job.getCounters(); Counter outputRecordCounter = counters.findCounter(Task.Counter.MAP_OUTPUT_RECORDS); final long resultCount = outputRecordCounter.getValue(); Counter inputBytesCounter = counters.findCounter(Task.Counter.MAP_INPUT_BYTES); Sampler.sizeOfLastProcessedFile = inputBytesCounter.getValue(); // Ratio of records to return from output based on the threshold // Note that any number greater than or equal to one will cause all // elements to be returned final double selectRatio = (double) threshold / resultCount; // Read job result int result_size = 0; if (output != null) { Text line = new Text(); FileStatus[] results = outFs.listStatus(outputPath); for (FileStatus fileStatus : results) { if (fileStatus.getLen() > 0 && fileStatus.getPath().getName().startsWith("part-")) { LineReader lineReader = new LineReader(outFs.open(fileStatus.getPath())); try { while (lineReader.readLine(line) > 0) { if (Math.random() < selectRatio) { if (output != null) { outObj.fromText(line); output.collect(outObj); } result_size++; } } } catch (RuntimeException e) { e.printStackTrace(); } lineReader.close(); } } } outFs.delete(outputPath, true); return result_size; }
From source file:com.ricemap.spateDB.util.RandomSpatialGenerator.java
License:Apache License
public static void generateMapReduce(Path file, Prism mbr, long size, long blocksize, Shape shape, String sindex, long seed, int rectsize, RandomShapeGenerator.DistributionType type, boolean overwrite) throws IOException { JobConf job = new JobConf(RandomSpatialGenerator.class); job.setJobName("Generator"); FileSystem outFs = file.getFileSystem(job); // Overwrite output file if (outFs.exists(file)) { if (overwrite) outFs.delete(file, true); else// w w w . j ava2s.c o m throw new RuntimeException( "Output file '" + file + "' already exists and overwrite flag is not set"); } // Set generation parameters in job job.setLong(RandomShapeGenerator.GenerationSize, size); SpatialSite.setPrism(job, RandomShapeGenerator.GenerationMBR, mbr); if (seed != 0) job.setLong(RandomShapeGenerator.GenerationSeed, seed); if (rectsize != 0) job.setInt(RandomShapeGenerator.GenerationRectSize, rectsize); if (type != null) job.set(RandomShapeGenerator.GenerationType, type.toString()); ClusterStatus clusterStatus = new JobClient(job).getClusterStatus(); // Set input format and map class job.setInputFormat(RandomInputFormat.class); job.setMapperClass(Repartition.RepartitionMap.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(shape.getClass()); job.setNumMapTasks(10 * Math.max(1, clusterStatus.getMaxMapTasks())); SpatialSite.setShapeClass(job, shape.getClass()); if (blocksize != 0) { job.setLong(SpatialSite.LOCAL_INDEX_BLOCK_SIZE, blocksize); } CellInfo[] cells; if (sindex == null) { cells = new CellInfo[] { new CellInfo(1, mbr) }; } else if (sindex.equals("grid")) { GridInfo gridInfo = new GridInfo(mbr.t1, mbr.x1, mbr.y1, mbr.t2, mbr.x2, mbr.y2); FileSystem fs = file.getFileSystem(job); if (blocksize == 0) { blocksize = fs.getDefaultBlockSize(file); } int numOfCells = Repartition.calculateNumberOfPartitions(job, size, fs, file, blocksize); gridInfo.calculateCellDimensions(numOfCells); cells = gridInfo.getAllCells(); } else { throw new RuntimeException("Unsupported spatial index: " + sindex); } SpatialSite.setCells(job, cells); // Do not set a reduce function. Use the default identity reduce function if (cells.length == 1) { // All objects are in one partition. No need for a reduce phase job.setNumReduceTasks(0); } else { // More than one partition. Need a reduce phase to group shapes of the // same partition together job.setReducerClass(RepartitionReduce.class); job.setNumReduceTasks( Math.max(1, Math.min(cells.length, (clusterStatus.getMaxReduceTasks() * 9 + 5) / 10))); } // Set output path FileOutputFormat.setOutputPath(job, file); if (sindex == null || sindex.equals("grid")) { job.setOutputFormat(GridOutputFormat.class); } else { throw new RuntimeException("Unsupported spatial index: " + sindex); } JobClient.runJob(job); // Concatenate all master files into one file FileStatus[] resultFiles = outFs.listStatus(file, new PathFilter() { @Override public boolean accept(Path path) { return path.getName().contains("_master"); } }); String ext = resultFiles[0].getPath().getName() .substring(resultFiles[0].getPath().getName().lastIndexOf('.')); Path masterPath = new Path(file, "_master" + ext); OutputStream destOut = outFs.create(masterPath); byte[] buffer = new byte[4096]; for (FileStatus f : resultFiles) { InputStream in = outFs.open(f.getPath()); int bytes_read; do { bytes_read = in.read(buffer); if (bytes_read > 0) destOut.write(buffer, 0, bytes_read); } while (bytes_read > 0); in.close(); outFs.delete(f.getPath(), false); } destOut.close(); // Plot an image for the partitions used in file Path imagePath = new Path(file, "_partitions.png"); int imageSize = (int) (Math.sqrt(cells.length) * 300); Plot.plotLocal(masterPath, imagePath, new Partition(), imageSize, imageSize, Color.BLACK, false, false, false); }
From source file:com.ricemap.spateDB.util.RandomSpatialGenerator.java
License:Apache License
/** * Generates random rectangles and write the result to a file. * @param outFS - The file system that contains the output file * @param outputFile - The file name to write to. If either outFS or * outputFile is null, data is generated to the standard output * @param mbr - The whole MBR to generate in * @param shape /*from w w w . jav a2 s. c o m*/ * @param totalSize - The total size of the generated file * @param blocksize * @throws IOException */ public static void generateFileLocal(Path outFile, Shape shape, String sindex, long totalSize, Prism mbr, DistributionType type, int rectSize, long seed, long blocksize, boolean overwrite) throws IOException { FileSystem outFS = outFile.getFileSystem(new Configuration()); if (blocksize == 0) blocksize = outFS.getDefaultBlockSize(outFile); // Calculate the dimensions of each partition based on gindex type CellInfo[] cells; if (sindex == null) { cells = new CellInfo[] { new CellInfo(1, mbr) }; } else if (sindex.equals("grid")) { int num_partitions = Repartition.calculateNumberOfPartitions(new Configuration(), totalSize, outFS, outFile, blocksize); GridInfo gridInfo = new GridInfo(mbr.t1, mbr.x1, mbr.y1, mbr.t2, mbr.x2, mbr.y2); gridInfo.calculateCellDimensions(num_partitions); cells = gridInfo.getAllCells(); } else { throw new RuntimeException("Unsupported spatial index: " + sindex); } // Overwrite output file if (outFS.exists(outFile)) { if (overwrite) outFS.delete(outFile, true); else throw new RuntimeException( "Output file '" + outFile + "' already exists and overwrite flag is not set"); } outFS.mkdirs(outFile); ShapeRecordWriter<Shape> writer; if (sindex == null || sindex.equals("grid")) { writer = new GridRecordWriter<Shape>(outFile, null, null, cells, false, false); } else { throw new RuntimeException("Unupoorted spatial idnex: " + sindex); } if (rectSize == 0) rectSize = 100; long t1 = System.currentTimeMillis(); RandomShapeGenerator<Shape> generator = new RandomShapeGenerator<Shape>(totalSize, mbr, type, rectSize, seed); Prism key = generator.createKey(); while (generator.next(key, shape)) { // Serialize it to text writer.write(NullWritable.get(), shape); } writer.close(null); long t2 = System.currentTimeMillis(); System.out.println("Generation time: " + (t2 - t1) + " millis"); }
From source file:com.rim.logdriver.admin.HFind.java
License:Apache License
@Override public int run(String[] args) throws Exception { final long startTime = System.currentTimeMillis(); int i = 0;/* w ww . j ava2 s . c om*/ while (i < args.length) { if (args[i].startsWith("-")) { break; } Path path = new Path(args[i]); FileSystem fs = path.getFileSystem(getConf()); FileStatus[] fileStatuses = fs.globStatus(path); if (fileStatuses != null) { for (FileStatus fileStatus : fileStatuses) { paths.add(fileStatus.getPath()); fileStatusCache.put(fileStatus.getPath(), fileStatus); } } i++; } while (i < args.length) { // -print action if ("-print".equals(args[i])) { actions.add(new FileStatusFilter() { @Override public boolean accept(FileStatus fileStatus) { System.out.println(fileStatus.getPath()); return true; } }); } // -delete action if ("-delete".equals(args[i])) { actions.add(new FileStatusFilter() { @Override public boolean accept(FileStatus fileStatus) { try { FileSystem fs = fileStatus.getPath().getFileSystem(getConf()); if (!fileStatus.isDir() || fs.listStatus(fileStatus.getPath()).length == 0) { return fs.delete(fileStatus.getPath(), true); } } catch (IOException e) { e.printStackTrace(); } return false; } }); } // -atime test else if ("-atime".equals(args[i])) { i++; if (i >= args.length) { System.err.println("Missing arguement for -atime"); System.exit(1); } String t = args[i]; if (t.charAt(0) == '+') { final long time = Long.parseLong(t.substring(1)); tests.add(new FileStatusFilter() { @Override public boolean accept(FileStatus fileStatus) { if ((startTime - fileStatus.getAccessTime()) / (24 * 60 * 60 * 1000) > time) { return true; } else { return false; } } }); } else if (t.charAt(0) == '-') { final long time = Long.parseLong(t.substring(1)); tests.add(new FileStatusFilter() { @Override public boolean accept(FileStatus fileStatus) { if ((startTime - fileStatus.getAccessTime()) / (24 * 60 * 60 * 1000) < time) { return true; } else { return false; } } }); } else { final long time = Long.parseLong(t); tests.add(new FileStatusFilter() { @Override public boolean accept(FileStatus fileStatus) { if ((startTime - fileStatus.getAccessTime()) / (24 * 60 * 60 * 1000) == time) { return true; } else { return false; } } }); } } // -mtime test else if ("-mtime".equals(args[i])) { i++; if (i >= args.length) { System.err.println("Missing arguement for -mtime"); System.exit(1); } String t = args[i]; if (t.charAt(0) == '+') { final long time = Long.parseLong(t.substring(1)); tests.add(new FileStatusFilter() { @Override public boolean accept(FileStatus fileStatus) { if ((startTime - fileStatus.getModificationTime()) / (24 * 60 * 60 * 1000) > time) { return true; } else { return false; } } }); } else if (t.charAt(0) == '-') { final long time = Long.parseLong(t.substring(1)); tests.add(new FileStatusFilter() { @Override public boolean accept(FileStatus fileStatus) { if ((startTime - fileStatus.getModificationTime()) / (24 * 60 * 60 * 1000) < time) { return true; } else { return false; } } }); } else { final long time = Long.parseLong(t); tests.add(new FileStatusFilter() { @Override public boolean accept(FileStatus fileStatus) { if ((startTime - fileStatus.getModificationTime()) / (24 * 60 * 60 * 1000) == time) { return true; } else { return false; } } }); } } // -amin test else if ("-amin".equals(args[i])) { i++; if (i >= args.length) { System.err.println("Missing arguement for -amin"); System.exit(1); } String t = args[i]; if (t.charAt(0) == '+') { final long time = Long.parseLong(t.substring(1)); tests.add(new FileStatusFilter() { @Override public boolean accept(FileStatus fileStatus) { if ((startTime - fileStatus.getAccessTime()) / (60 * 1000) > time) { return true; } else { return false; } } }); } else if (t.charAt(0) == '-') { final long time = Long.parseLong(t.substring(1)); tests.add(new FileStatusFilter() { @Override public boolean accept(FileStatus fileStatus) { if ((startTime - fileStatus.getAccessTime()) / (60 * 1000) < time) { return true; } else { return false; } } }); } else { final long time = Long.parseLong(t); tests.add(new FileStatusFilter() { @Override public boolean accept(FileStatus fileStatus) { if ((startTime - fileStatus.getAccessTime()) / (60 * 1000) == time) { return true; } else { return false; } } }); } } // -mmin test else if ("-mmin".equals(args[i])) { i++; if (i >= args.length) { System.err.println("Missing arguement for -mmin"); System.exit(1); } String t = args[i]; if (t.charAt(0) == '+') { final long time = Long.parseLong(t.substring(1)); tests.add(new FileStatusFilter() { @Override public boolean accept(FileStatus fileStatus) { if ((startTime - fileStatus.getModificationTime()) / (60 * 1000) > time) { return true; } else { return false; } } }); } else if (t.charAt(0) == '-') { final long time = Long.parseLong(t.substring(1)); tests.add(new FileStatusFilter() { @Override public boolean accept(FileStatus fileStatus) { if ((startTime - fileStatus.getModificationTime()) / (60 * 1000) < time) { return true; } else { return false; } } }); } else { final long time = Long.parseLong(t); tests.add(new FileStatusFilter() { @Override public boolean accept(FileStatus fileStatus) { if ((startTime - fileStatus.getModificationTime()) / (60 * 1000) == time) { return true; } else { return false; } } }); } } // -regex test else if ("-regex".equals(args[i])) { i++; if (i >= args.length) { System.err.println("Missing arguement for -regex"); System.exit(1); } final Pattern p = Pattern.compile(args[i]); tests.add(new FileStatusFilter() { @Override public boolean accept(FileStatus fileStatus) { if (p.matcher(fileStatus.getPath().toString()).matches()) { return true; } else { return false; } } }); } i++; } if (actions.size() == 0) { actions.add(new FileStatusFilter() { @Override public boolean accept(FileStatus fileStatus) { System.out.println(fileStatus.getPath()); return true; } }); } search(); return 0; }
From source file:com.rim.logdriver.admin.LogMaintenance.java
License:Apache License
@Override public int run(String[] args) throws Exception { Configuration conf = getConf(); // If run by Oozie, then load the Oozie conf too if (System.getProperty("oozie.action.conf.xml") != null) { conf.addResource(new URL("file://" + System.getProperty("oozie.action.conf.xml"))); }//from ww w . j a v a2 s . c o m // For some reason, Oozie needs some options to be set in system instead of // in the confiuration. So copy the configs over. { Iterator<Entry<String, String>> i = conf.iterator(); while (i.hasNext()) { Entry<String, String> next = i.next(); System.setProperty(next.getKey(), next.getValue()); } } if (args.length < 3) { printUsage(); return 1; } String userName = args[0]; String dcNumber = args[1]; String service = args[2]; String date = null; String hour = null; if (args.length >= 4) { date = args[3]; } if (args.length >= 5) { hour = args[4]; } // Set from environment variables oozieUrl = getConfOrEnv(conf, "OOZIE_URL"); String mergeJobPropertiesFile = getConfOrEnv(conf, "MERGEJOB_CONF"); String filterJobPropertiesFile = getConfOrEnv(conf, "FILTERJOB_CONF"); String daysBeforeArchive = getConfOrEnv(conf, "DAYS_BEFORE_ARCHIVE"); String daysBeforeDelete = getConfOrEnv(conf, "DAYS_BEFORE_DELETE"); String maxConcurrentMergeJobs = getConfOrEnv(conf, "MAX_CONCURRENT_MERGE_JOBS"); String maxConcurrentFilterJobs = getConfOrEnv(conf, "MAX_CONCURRENT_FILTER_JOBS"); String zkConnectString = getConfOrEnv(conf, "ZK_CONNECT_STRING"); String logdir = getConfOrEnv(conf, "logdriver.logdir.name"); boolean resetOrphanedJobs = Boolean.parseBoolean(getConfOrEnv(conf, "reset.orphaned.jobs")); String rootDir = getConfOrEnv(conf, "service.root.dir"); boolean doMerge = true; boolean doArchive = true; boolean doDelete = true; if (oozieUrl == null) { LOG.info("OOZIE_URL is not set. Not merging or archiving."); doMerge = false; doArchive = false; } if (zkConnectString == null) { LOG.error("ZK_CONNECT_STRING is not set. Exiting."); return 1; } if (mergeJobPropertiesFile == null) { LOG.info("MERGEJOB_CONF is not set. Not merging."); doMerge = false; } if (filterJobPropertiesFile == null) { LOG.info("FILTERJOB_CONF is not set. Not archiving."); doArchive = false; } if (daysBeforeArchive == null) { LOG.info("DAYS_BEFORE_ARCHIVE is not set. Not archiving."); doArchive = false; } if (doArchive && Integer.parseInt(daysBeforeArchive) < 0) { LOG.info("DAYS_BEFORE_ARCHIVE is negative. Not archiving."); doArchive = false; } if (daysBeforeDelete == null) { LOG.info("DAYS_BEFORE_DELETE is not set. Not deleting."); doDelete = false; } if (doDelete && Integer.parseInt(daysBeforeDelete) < 0) { LOG.info("DAYS_BEFORE_DELETE is negative. Not deleting."); doDelete = false; } if (maxConcurrentMergeJobs == null) { LOG.info("MAX_CONCURRENT_MERGE_JOBS is not set. Using default value of -1."); maxConcurrentMergeJobs = "-1"; } if (maxConcurrentFilterJobs == null) { LOG.info("MAX_CONCURRENT_FILTER_JOBS is not set. Using default value of -1."); maxConcurrentMergeJobs = "-1"; } if (logdir == null) { LOG.info("LOGDRIVER_LOGDIR_NAME is not set. Using default value of 'logs'."); logdir = "logs"; } if (rootDir == null) { LOG.info("SERVICE_ROOT_DIR is not set. Using default value of 'service'."); rootDir = "/service"; } // Now it's safe to create our Oozie Runners. OozieRunner mergeOozieRunner = new OozieRunner(oozieUrl, Integer.parseInt(maxConcurrentMergeJobs)); Thread mergeOozieRunnerThread = new Thread(mergeOozieRunner); mergeOozieRunnerThread.setName("OozieRunner - Merge"); mergeOozieRunnerThread.setDaemon(false); mergeOozieRunnerThread.start(); OozieRunner filterOozieRunner = new OozieRunner(oozieUrl, Integer.parseInt(maxConcurrentFilterJobs)); Thread filterOozieRunnerThread = new Thread(filterOozieRunner); filterOozieRunnerThread.setName("OozieRunner - Filter"); filterOozieRunnerThread.setDaemon(false); filterOozieRunnerThread.start(); // Figure out what date we start filters on. String filterCutoffDate = ""; if (doArchive) { Calendar cal = Calendar.getInstance(); cal.add(Calendar.DAY_OF_MONTH, Integer.parseInt("-" + daysBeforeArchive)); filterCutoffDate = String.format("%04d%02d%02d%02d", cal.get(Calendar.YEAR), (cal.get(Calendar.MONTH) + 1), cal.get(Calendar.DAY_OF_MONTH), cal.get(Calendar.HOUR_OF_DAY)); LOG.info("Archiving logs from before {}", filterCutoffDate); } String deleteCutoffDate = ""; if (doDelete) { Calendar cal = Calendar.getInstance(); cal.add(Calendar.DAY_OF_MONTH, Integer.parseInt("-" + daysBeforeDelete)); deleteCutoffDate = String.format("%04d%02d%02d%02d", cal.get(Calendar.YEAR), (cal.get(Calendar.MONTH) + 1), cal.get(Calendar.DAY_OF_MONTH), cal.get(Calendar.HOUR_OF_DAY)); LOG.info("Deleting logs from before {}", deleteCutoffDate); } long now = System.currentTimeMillis(); // Various exceptions have been popping up here. So make sure I catch them // all. try { // We can hang if this fails. So make sure we abort if it fails. FileSystem fs = null; try { fs = FileSystem.get(conf); fs.exists(new Path("/")); // Test if it works. } catch (IOException e) { LOG.error("Error getting filesystem.", e); return 1; } // We'll need an Oozie client to check on orphaned directories. oozieClient = getOozieClient(); // LockUtils are used in a couple of places LockUtil lu = new LockUtil(zkConnectString); // Patterns to recognize hour, day and incoming directories, so that they // can be processed. Pattern datePathPattern; Pattern hourPathPattern; Pattern incomingPathPattern; Pattern dataPathPattern; Pattern archivePathPattern; Pattern workingPathPattern; if (hour != null) { datePathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/" + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(" + Pattern.quote(date) + ")"); hourPathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/" + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(" + Pattern.quote(date) + ")/(" + Pattern.quote(hour) + ")"); incomingPathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/" + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(" + Pattern.quote(date) + ")/(" + Pattern.quote(hour) + ")/([^/]+)/incoming"); dataPathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/" + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(" + Pattern.quote(date) + ")/(" + Pattern.quote(hour) + ")/([^/]+)/data"); archivePathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/" + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(" + Pattern.quote(date) + ")/(" + Pattern.quote(hour) + ")/([^/]+)/archive"); workingPathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/" + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(" + Pattern.quote(date) + ")/(" + Pattern.quote(hour) + ")/([^/]+)/working/([^/]+)_(\\d+)"); } else if (date != null) { datePathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/" + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(" + Pattern.quote(date) + ")"); hourPathPattern = Pattern .compile(rootDir + "/" + Pattern.quote(dcNumber) + "/" + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(" + Pattern.quote(date) + ")/(\\d{2})"); incomingPathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/" + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(" + Pattern.quote(date) + ")/(\\d{2})/([^/]+)/incoming"); dataPathPattern = Pattern .compile(rootDir + "/" + Pattern.quote(dcNumber) + "/" + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(" + Pattern.quote(date) + ")/(\\d{2})/([^/]+)/data"); archivePathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/" + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(" + Pattern.quote(date) + ")/(\\d{2})/([^/]+)/archive"); workingPathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/" + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(" + Pattern.quote(date) + ")/(\\d{2})/([^/]+)/working/([^/]+)_(\\d+)"); } else { datePathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/" + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(\\d{8})"); hourPathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/" + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(\\d{8})/(\\d{2})"); incomingPathPattern = Pattern .compile(rootDir + "/" + Pattern.quote(dcNumber) + "/" + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(\\d{8})/(\\d{2})/([^/]+)/incoming"); dataPathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/" + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(\\d{8})/(\\d{2})/([^/]+)/data"); archivePathPattern = Pattern .compile(rootDir + "/" + Pattern.quote(dcNumber) + "/" + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(\\d{8})/(\\d{2})/([^/]+)/archive"); workingPathPattern = Pattern .compile(rootDir + "/" + Pattern.quote(dcNumber) + "/" + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(\\d{8})/(\\d{2})/([^/]+)/working/([^/]+)_(\\d+)"); } // Do a depth first search of the directory, processing anything that // looks // interesting along the way Deque<Path> paths = new ArrayDeque<Path>(); Path rootPath = new Path(rootDir + "/" + dcNumber + "/" + service + "/" + logdir + "/"); paths.push(rootPath); while (paths.size() > 0) { Path p = paths.pop(); LOG.debug("{}", p.toString()); if (!fs.exists(p)) { continue; } FileStatus dirStatus = fs.getFileStatus(p); FileStatus[] children = fs.listStatus(p); boolean addChildren = true; boolean old = dirStatus.getModificationTime() < now - WAIT_TIME; LOG.debug(" Was last modified {}ms ago", now - dirStatus.getModificationTime()); if (!old) { LOG.debug(" Skipping, since it's not old enough."); } else if ((!rootPath.equals(p)) && (children.length == 0 || (children.length == 1 && children[0].getPath().getName().equals(READY_MARKER)))) { // old and no children? Delete! LOG.info(" Deleting empty directory {}", p.toString()); fs.delete(p, true); } else { Matcher matcher = datePathPattern.matcher(p.toUri().getPath()); if (matcher.matches()) { LOG.debug("Checking date directory"); // If this is already done, then skip it. So only process if it // doesn't exist. if (fs.exists(new Path(p, READY_MARKER)) == false) { // Check each subdirectory. If they all have ready markers, then I // guess we're ready. boolean ready = true; for (FileStatus c : children) { if (c.isDir() && fs.exists(new Path(c.getPath(), READY_MARKER)) == false) { ready = false; break; } } if (ready) { fs.createNewFile(new Path(p, READY_MARKER)); } } } matcher = hourPathPattern.matcher(p.toUri().getPath()); if (matcher.matches()) { LOG.debug("Checking hour directory"); // If this is already done, then skip it. So only process if it // doesn't exist. if (fs.exists(new Path(p, READY_MARKER)) == false) { // Check each subdirectory. If they all have ready markers, then I // guess we're ready. boolean ready = true; for (FileStatus c : children) { if (c.isDir() && fs.exists(new Path(c.getPath(), READY_MARKER)) == false) { ready = false; break; } } if (ready) { fs.createNewFile(new Path(p, READY_MARKER)); } } } // Check to see if we have to run a merge matcher = incomingPathPattern.matcher(p.toUri().getPath()); if (matcher.matches()) { LOG.debug("Checking incoming directory"); String matchDate = matcher.group(1); String matchHour = matcher.group(2); String matchComponent = matcher.group(3); String timestamp = matchDate + matchHour; if (doDelete && timestamp.compareTo(deleteCutoffDate) < 0) { LOG.info("Deleting old directory: {}", p); fs.delete(p, true); addChildren = false; } else if (doMerge) { // old, looks right, and has children? Run it! boolean hasMatchingChildren = false; boolean subdirTooYoung = false; for (FileStatus child : children) { if (!hasMatchingChildren) { FileStatus[] grandchildren = fs.listStatus(child.getPath()); for (FileStatus gc : grandchildren) { if (VALID_FILE.matcher(gc.getPath().getName()).matches()) { hasMatchingChildren = true; break; } } } if (!subdirTooYoung) { if (child.getModificationTime() >= now - WAIT_TIME) { subdirTooYoung = true; LOG.debug(" Subdir {} is too young.", child.getPath()); } } } if (!hasMatchingChildren) { LOG.debug(" No files match the expected pattern ({})", VALID_FILE.pattern()); } if (hasMatchingChildren && !subdirTooYoung) { LOG.info(" Run Merge job {} :: {} {} {} {} {}", new Object[] { p.toString(), dcNumber, service, matchDate, matchHour, matchComponent }); Properties oozieJobProps = new Properties(); oozieJobProps.load(new FileInputStream(mergeJobPropertiesFile)); oozieJobProps.setProperty("rootDir", rootDir); oozieJobProps.setProperty("dcNumber", dcNumber); oozieJobProps.setProperty("service", service); oozieJobProps.setProperty("date", matchDate); oozieJobProps.setProperty("hour", matchHour); oozieJobProps.setProperty("component", matchComponent); oozieJobProps.setProperty("user.name", userName); oozieJobProps.setProperty("logdir", logdir); mergeOozieRunner.submit(oozieJobProps); addChildren = false; } } } // Check to see if we need to run a filter and archive matcher = dataPathPattern.matcher(p.toUri().getPath()); if (matcher.matches()) { String matchDate = matcher.group(1); String matchHour = matcher.group(2); String matchComponent = matcher.group(3); String timestamp = matchDate + matchHour; if (doDelete && timestamp.compareTo(deleteCutoffDate) < 0) { LOG.info("Deleting old directory: {}", p); fs.delete(p, true); addChildren = false; } else if (doArchive && timestamp.compareTo(filterCutoffDate) < 0) { Properties oozieJobProps = new Properties(); oozieJobProps.load(new FileInputStream(filterJobPropertiesFile)); oozieJobProps.setProperty("rootDir", rootDir); oozieJobProps.setProperty("dcNumber", dcNumber); oozieJobProps.setProperty("service", service); oozieJobProps.setProperty("date", matchDate); oozieJobProps.setProperty("hour", matchHour); oozieJobProps.setProperty("component", matchComponent); oozieJobProps.setProperty("user.name", userName); oozieJobProps.setProperty("logdir", logdir); // Check to see if we should just keep all or delete all here. // The filter file should be here String appPath = oozieJobProps.getProperty("oozie.wf.application.path"); appPath = appPath.replaceFirst("\\$\\{.*?\\}", ""); Path filterFile = new Path(appPath + "/" + service + ".yaml"); LOG.info("Filter file is {}", filterFile); if (fs.exists(filterFile)) { List<BoomFilterMapper.Filter> filters = BoomFilterMapper.loadFilters(matchComponent, fs.open(filterFile)); if (filters == null) { LOG.warn( " Got null when getting filters. Not processing. {} :: {} {} {} {} {}", new Object[] { p.toString(), dcNumber, service, matchDate, matchHour, matchComponent }); } else if (filters.size() == 0) { LOG.warn(" Got no filters. Not processing. {} :: {} {} {} {} {}", new Object[] { p.toString(), dcNumber, service, matchDate, matchHour, matchComponent }); } else if (filters.size() == 1 && filters.get(0) instanceof BoomFilterMapper.KeepAllFilter) { LOG.info(" Keeping everything. {} :: {} {} {} {} {}", new Object[] { p.toString(), dcNumber, service, matchDate, matchHour, matchComponent }); // Move files from data to archive // delete it all! String destination = rootDir + "/" + dcNumber + "/" + service + "/" + logdir + "/" + matchDate + "/" + matchHour + "/" + matchComponent + "/archive/"; String[] moveArgs = { zkConnectString, dcNumber, service, matchDate, matchHour, matchComponent, "move " + p.toUri().getPath() + " " + destination }; ToolRunner.run(new Configuration(), new LockedFs(), moveArgs); } else if (filters.size() == 1 && filters.get(0) instanceof BoomFilterMapper.DropAllFilter) { LOG.info(" Dropping everything. {} :: {} {} {} {} {}", new Object[] { p.toString(), dcNumber, service, matchDate, matchHour, matchComponent }); // delete it all! String[] delArgs = { zkConnectString, dcNumber, service, matchDate, matchHour, matchComponent, "delete " + p.toUri().getPath() }; ToolRunner.run(new Configuration(), new LockedFs(), delArgs); } else { LOG.info(" Run Filter/Archive job {} :: {} {} {} {} {}", new Object[] { p.toString(), dcNumber, service, matchDate, matchHour, matchComponent }); filterOozieRunner.submit(oozieJobProps); } } else { LOG.warn("Skipping filter job, since no filter file exists"); } addChildren = false; } } matcher = archivePathPattern.matcher(p.toUri().getPath()); if (matcher.matches()) { String matchDate = matcher.group(1); String matchHour = matcher.group(2); String timestamp = matchDate + matchHour; if (doDelete && timestamp.compareTo(deleteCutoffDate) < 0) { LOG.info("Deleting old directory: {}", p); fs.delete(p, true); addChildren = false; } } matcher = workingPathPattern.matcher(p.toUri().getPath()); if (matcher.matches()) { LOG.info(" Matches working pattern"); if (resetOrphanedJobs) { String matchDate = matcher.group(1); String matchHour = matcher.group(2); String matchComponent = matcher.group(3); String matchOozieJobId = matcher.group(4); // Check to see what's up with the oozie job. If it's still // running, // we don't want to touch it. Status status = null; try { WorkflowJob jobInfo = oozieClient.getJobInfo(matchOozieJobId); status = jobInfo.getStatus(); } catch (OozieClientException e) { if (e.getMessage() != null && e.getMessage().contains("Job does not exist")) { LOG.info("Oozie job not found. Proceeding as though job was failed.", e); status = Status.FAILED; } else { LOG.error("Oozie client error. Not Proceeding.", e); } } LOG.info(" Oozie job status is {}", status); if (status != null && status != Status.RUNNING && status != Status.PREP && status != Status.SUSPENDED) { // Move everything from working/xxx/incoming/ to incoming/ PathInfo lockPathInfo = new PathInfo(rootDir + "/" + dcNumber + "/" + service + "/" + logdir + "/" + matchDate + "/" + matchHour + "/" + matchComponent); lu.acquireWriteLock(lu.getLockPath(lockPathInfo)); FileStatus[] fileStatuses = fs .listStatus(new Path(p.toUri().getPath() + "/incoming/")); if (fileStatuses != null) { for (FileStatus fileStatus : fileStatuses) { Path toPath = new Path(fileStatus.getPath().getParent().getParent() .getParent().getParent(), "incoming/" + fileStatus.getPath().getName()); LOG.info(" Moving data from {} to {}", fileStatus.getPath(), toPath); LOG.info(" mkdir {}", toPath); fs.mkdirs(toPath); Path fromDir = new Path(p.toUri().getPath(), "incoming/" + fileStatus.getPath().getName()); LOG.info(" moving from {}", fromDir); FileStatus[] files = fs.listStatus(fromDir); if (files == null || files.length == 0) { LOG.info(" Nothing to move from {}", fromDir); } else { for (FileStatus f : files) { LOG.info(" rename {} {}", f.getPath(), new Path(toPath, f.getPath().getName())); fs.rename(f.getPath(), new Path(toPath, f.getPath().getName())); } } LOG.info(" rm {}", fileStatus.getPath().getParent().getParent()); fs.delete(fileStatus.getPath().getParent().getParent(), true); } lu.releaseWriteLock(lu.getLockPath(lockPathInfo)); } } } addChildren = false; } } // Add any children which are directories to the stack. if (addChildren) { for (int i = children.length - 1; i >= 0; i--) { FileStatus child = children[i]; if (child.isDir()) { paths.push(child.getPath()); } } } } // Since we may have deleted a bunch of directories, delete any unused // locks // from ZooKeeper. { LOG.info("Checking for unused locks in ZooKeeper"); String scanPath = rootDir + "/" + dcNumber + "/" + service + "/" + logdir; if (date != null) { scanPath += "/" + date; if (hour != null) { scanPath += "/" + hour; } } List<LockInfo> lockInfo = lu.scan(scanPath); for (LockInfo li : lockInfo) { // Check if the lock path still exists in HDFS. If it doesn't, then // delete it from ZooKeeper. String path = li.getPath(); String hdfsPath = path.substring(LockUtil.ROOT.length()); if (!fs.exists(new Path(hdfsPath))) { ZooKeeper zk = lu.getZkClient(); while (!path.equals(LockUtil.ROOT)) { try { zk.delete(path, -1); } catch (KeeperException.NotEmptyException e) { // That's fine. just stop trying then. break; } catch (Exception e) { LOG.error("Caught exception trying to delete from ZooKeeper.", e); break; } LOG.info("Deleted from ZooKeeper: {}", path); path = path.substring(0, path.lastIndexOf('/')); } } } } lu.close(); // Now that we're done, wait for the Oozie Runner to stop, and print the // results. LOG.info("Waiting for Oozie jobs to complete."); mergeOozieRunner.shutdown(); mergeOozieRunnerThread.join(); LOG.info("Oozie Job Stats : Merge : Started={} Succeeded={} failed={} errors={}", new Object[] { mergeOozieRunner.getStarted(), mergeOozieRunner.getSucceeded(), mergeOozieRunner.getFailed(), mergeOozieRunner.getErrors() }); filterOozieRunner.shutdown(); filterOozieRunnerThread.join(); LOG.info("Oozie Job Stats : Filter : Started={} Succeeded={} failed={} errors={}", new Object[] { filterOozieRunner.getStarted(), filterOozieRunner.getSucceeded(), filterOozieRunner.getFailed(), filterOozieRunner.getErrors() }); } catch (Exception e) { LOG.error("Unexpected exception caught.", e); return 1; } return 0; }
From source file:com.rockstor.compact.Compactor.java
License:Apache License
public void compactData(String taskIdName) throws IOException, NoSuchAlgorithmException { Path dstDir = new Path(pathUtil.getSpecTaskDir(taskIdName)); FileSystem dfs = RockAccessor.getFileSystem(); if (!dfs.exists(dstDir)) { LOG.error("[COMPACTOR]: Directory " + dstDir + " is not exist"); return;/*from w ww. j av a2s . com*/ } String metaFileName = pathUtil.getTaskMetaPath(taskIdName); if (!dfs.exists(new Path(metaFileName))) { LOG.error("[COMPACTOR]: meta file " + metaFileName + " is not existed"); return; } // compact data // 1. create rock data file String rockIdStr = null; // 2. create rock index file // 3. load meta file TaskMetaReader rocksMeta = new TaskMetaReader(); rocksMeta.open(metaFileName); Map<String, byte[]> rocks = rocksMeta.getRocks(); rocksMeta.close(); // 4. compact rock files one by one /* * for(rock:rocks){ load rock gb from db; load rock gb from delete file * sort gb by offset copy chunks to new data file, and write new index, * if offset is in gb set, drop and continue } */ Map<Long, Long> gbIndexes = null; RockIndexReader rockIndexReader = null; RockReader rockReader = null; Chunk chunk = null; // create rock writer RockCompactWriter rockWriter = new RockCompactWriter(); rockWriter.create(taskIdName); rockIdStr = rockWriter.getRockID(); String dataFileName = pathUtil.getTaskDataPath(taskIdName, rockIdStr); String gbIndexPath = null; long pos = 0; Long size = null; for (Entry<String, byte[]> entry : rocks.entrySet()) { LOG.info("compacting rock :" + entry.getKey()); } for (Entry<String, byte[]> entry : rocks.entrySet()) { gbIndexes = RockDB.getGarbages(entry.getValue()); rockIndexReader = new RockIndexReader(); LOG.debug("get " + gbIndexes.size() + " invalid chunks of rock " + entry.getKey() + " from chunk DB"); gbIndexPath = pathUtil.getGbMetaPath(entry.getKey()); if (dfs.exists(new Path(gbIndexPath))) { rockIndexReader.open(gbIndexPath); // merge gb data index while (rockIndexReader.hasNext()) { chunk = rockIndexReader.next(); LOG.debug("ignore list append chunk: " + chunk); gbIndexes.put(chunk.getOffset(), chunk.getSize() + Chunk.HEADER_LEN); } rockIndexReader.close(); } // copy chunks and write new index rockReader = RockReaderPool.getInstance().get(entry.getKey()); FSDataInputStream input = rockReader.getFSDataInputStream(); int pedding_bytes = 0; while (rockReader.hasNext()) { pos = rockReader.getPos(); pedding_bytes = (int) (pos & 7); if (pedding_bytes != 0) { pos = pos + 8 - pedding_bytes; } // LOG.info("pos now: "+pos); size = gbIndexes.get(pos); // ignore deleted chunk if (size != null) { LOG.debug("ignore chunk at " + pos + ", size: " + size); rockReader.seekg(pos + size); continue; } chunk = rockReader.nextChunk(); if (chunk == null) { LOG.error("[Compactor] read source chunk from " + entry.getKey() + ":" + pos + " Failed"); throw new IOException( "[Compactor] read source chunk from " + entry.getKey() + ":" + pos + " Failed"); } rockWriter.addChunk(chunk, input); } } rockWriter.close(); // 5. rename ${compactorDir}/rockId.dat ==> $(rock_data_dir)/rockId dfs.rename(new Path(dataFileName), new Path(Rock.HADOOP_DATA_HOME + "/" + rockIdStr)); // 6. remove invalid chunks removeInvalidChunks(taskIdName); // 7. sync left chunks syncLeftChunks(taskIdName); // 8. remove task dir dfs.delete(dstDir, true); }
From source file:com.rockstor.tools.RockStorFsFormat.java
License:Apache License
protected void cleanDfs() throws IOException { RockAccessor.connectHDFS();// w ww . ja va2 s . c om String rootDir = conf.get("rockstor.rootdir"); LOG.info("connect to hdfs ok!"); FileSystem dfs = RockAccessor.getFileSystem(); dfs.delete(new Path(rootDir), true); LOG.info("remove rockstor root dir " + rootDir + " OK!"); RockAccessor.disconnectHDFS(); LOG.info("disconnect from hdfs ok!"); }
From source file:com.savy3.nonequijoin.MapOutputSampler.java
License:Apache License
/** * Driver for InputSampler MapReduce Job *//*from w w w . ja v a 2 s . c o m*/ public static void runMap(Job job, Path sampleInputPath) throws IOException, IllegalStateException, ClassNotFoundException, InterruptedException { LOG.info("Running a MapReduce Job on Sample Input File" + sampleInputPath.toString()); Configuration conf = new Configuration(); conf.setBoolean("mapreduce.job.ubertask.enable", true); conf.set("numSamples", "" + (job.getNumReduceTasks() - 1)); Job sampleJob = new Job(conf); sampleJob.setMapperClass(job.getMapperClass()); sampleJob.setReducerClass(SampleKeyReducer.class); sampleJob.setJarByClass(job.getMapperClass()); sampleJob.setMapOutputKeyClass(job.getMapOutputKeyClass()); sampleJob.setMapOutputValueClass(job.getMapOutputValueClass()); sampleJob.setOutputKeyClass(job.getMapOutputKeyClass()); sampleJob.setOutputValueClass(NullWritable.class); sampleJob.setInputFormatClass(SequenceFileInputFormat.class); sampleJob.setOutputFormatClass(SequenceFileOutputFormat.class); SequenceFileInputFormat.addInputPath(sampleJob, sampleInputPath); FileSystem fs = FileSystem.get(conf); Path out = new Path(sampleInputPath.getParent(), "mapOut"); fs.delete(out, true); SequenceFileOutputFormat.setOutputPath(sampleJob, out); sampleJob.waitForCompletion(true); LOG.info("Sample MapReduce Job Output File" + out.toString()); Path partFile = new Path(out, "part-r-00000"); Path tmpFile = new Path("/_tmp"); fs.delete(tmpFile, true); fs.rename(partFile, tmpFile); fs.delete(sampleInputPath.getParent(), true); fs.rename(new Path("/_tmp"), sampleInputPath.getParent()); LOG.info("Sample partitioning file cpied to location " + sampleInputPath.getParent().toString()); }
From source file:com.scaleunlimited.cascading.hadoop.HadoopUtils.java
License:Apache License
public static void safeRemove(FileSystem fs, Path path) { if ((fs != null) && (path != null)) { try {// w w w .ja va2s .c o m fs.delete(path, true); } catch (Throwable t) { // Ignore } } }