Example usage for org.apache.hadoop.fs FileSystem delete

List of usage examples for org.apache.hadoop.fs FileSystem delete

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem delete.

Prototype

public abstract boolean delete(Path f, boolean recursive) throws IOException;

Source Link

Document

Delete a file.

Usage

From source file:com.ricemap.spateDB.operations.Repartition.java

License:Apache License

/**
 * Repartitions a file on local machine without MapReduce jobs.
 * @param inFs/* w  w w . j  a v a2  s  .c o m*/
 * @param in
 * @param outFs
 * @param out
 * @param cells
 * @param stockShape
 * @param rtree
 * @param overwrite
 * @throws IOException 
 */
public static <S extends Shape> void repartitionLocal(Path in, Path out, S stockShape, long blockSize,
        CellInfo[] cells, String sindex, boolean overwrite) throws IOException {
    FileSystem inFs = in.getFileSystem(new Configuration());
    FileSystem outFs = out.getFileSystem(new Configuration());
    // Overwrite output file
    if (outFs.exists(out)) {
        if (overwrite)
            outFs.delete(out, true);
        else
            throw new RuntimeException(
                    "Output file '" + out + "' already exists and overwrite flag is not set");
    }
    outFs.mkdirs(out);

    ShapeRecordWriter<Shape> writer;
    boolean pack = sindex.equals("r+tree");
    boolean expand = sindex.equals("rtree");
    if (sindex.equals("grid")) {
        writer = new GridRecordWriter<Shape>(out, null, null, cells, pack, expand);
    } else if (sindex.equals("rtree") || sindex.equals("r+tree")) {
        writer = new RTreeGridRecordWriter<Shape>(out, null, null, cells, pack, expand);
        writer.setStockObject(stockShape);
    } else {
        throw new RuntimeException("Unupoorted spatial idnex: " + sindex);
    }

    FileStatus inFileStatus = inFs.getFileStatus(in);
    // Copy blocksize from source file if it's globally indexed
    if (blockSize == 0) {
        GlobalIndex<Partition> globalIndex = SpatialSite.getGlobalIndex(inFs, in);
        if (globalIndex != null) {
            blockSize = inFs.getFileStatus(new Path(in, globalIndex.iterator().next().filename)).getBlockSize();
        }
    }
    if (blockSize != 0)
        ((GridRecordWriter<Shape>) writer).setBlockSize(blockSize);

    long length = inFileStatus.getLen();
    FSDataInputStream datain = inFs.open(in);
    ShapeRecordReader<S> reader = new ShapeRecordReader<S>(datain, 0, length);
    Prism c = reader.createKey();

    NullWritable dummy = NullWritable.get();

    while (reader.next(c, stockShape)) {
        writer.write(dummy, stockShape);
    }
    writer.close(null);
}

From source file:com.ricemap.spateDB.operations.Sampler.java

License:Apache License

/**
 * Sample a ratio of the file through a MapReduce job
 * @param fs/*  w  w  w .  j  a  v  a 2 s. co  m*/
 * @param files
 * @param ratio
 * @param threshold - Maximum number of elements to be sampled
 * @param output
 * @param inObj
 * @return
 * @throws IOException
 */
public static <T extends TextSerializable, O extends TextSerializable> int sampleMapReduceWithRatio(
        FileSystem fs, Path[] files, double ratio, long threshold, long seed, final ResultCollector<O> output,
        T inObj, O outObj) throws IOException {
    JobConf job = new JobConf(FileMBR.class);

    Path outputPath;
    FileSystem outFs = FileSystem.get(job);
    do {
        outputPath = new Path(files[0].toUri().getPath() + ".sample_" + (int) (Math.random() * 1000000));
    } while (outFs.exists(outputPath));

    job.setJobName("Sample");
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(Text.class);
    job.setClass(InClass, inObj.getClass(), TextSerializable.class);
    job.setClass(OutClass, outObj.getClass(), TextSerializable.class);

    job.setMapperClass(Map.class);
    job.setLong(RANDOM_SEED, seed);
    job.setFloat(SAMPLE_RATIO, (float) ratio);

    ClusterStatus clusterStatus = new JobClient(job).getClusterStatus();
    job.setNumMapTasks(clusterStatus.getMaxMapTasks() * 5);
    job.setNumReduceTasks(0);

    job.setInputFormat(ShapeLineInputFormat.class);
    job.setOutputFormat(TextOutputFormat.class);

    ShapeLineInputFormat.setInputPaths(job, files);
    TextOutputFormat.setOutputPath(job, outputPath);

    // Submit the job
    RunningJob run_job = JobClient.runJob(job);

    Counters counters = run_job.getCounters();
    Counter outputRecordCounter = counters.findCounter(Task.Counter.MAP_OUTPUT_RECORDS);
    final long resultCount = outputRecordCounter.getValue();

    Counter inputBytesCounter = counters.findCounter(Task.Counter.MAP_INPUT_BYTES);
    Sampler.sizeOfLastProcessedFile = inputBytesCounter.getValue();

    // Ratio of records to return from output based on the threshold
    // Note that any number greater than or equal to one will cause all
    // elements to be returned
    final double selectRatio = (double) threshold / resultCount;

    // Read job result
    int result_size = 0;
    if (output != null) {
        Text line = new Text();
        FileStatus[] results = outFs.listStatus(outputPath);

        for (FileStatus fileStatus : results) {
            if (fileStatus.getLen() > 0 && fileStatus.getPath().getName().startsWith("part-")) {
                LineReader lineReader = new LineReader(outFs.open(fileStatus.getPath()));
                try {
                    while (lineReader.readLine(line) > 0) {
                        if (Math.random() < selectRatio) {
                            if (output != null) {
                                outObj.fromText(line);
                                output.collect(outObj);
                            }
                            result_size++;
                        }
                    }
                } catch (RuntimeException e) {
                    e.printStackTrace();
                }
                lineReader.close();
            }
        }
    }

    outFs.delete(outputPath, true);

    return result_size;
}

From source file:com.ricemap.spateDB.util.RandomSpatialGenerator.java

License:Apache License

public static void generateMapReduce(Path file, Prism mbr, long size, long blocksize, Shape shape,
        String sindex, long seed, int rectsize, RandomShapeGenerator.DistributionType type, boolean overwrite)
        throws IOException {
    JobConf job = new JobConf(RandomSpatialGenerator.class);

    job.setJobName("Generator");
    FileSystem outFs = file.getFileSystem(job);

    // Overwrite output file
    if (outFs.exists(file)) {
        if (overwrite)
            outFs.delete(file, true);
        else// w  w  w . j ava2s.c  o m
            throw new RuntimeException(
                    "Output file '" + file + "' already exists and overwrite flag is not set");
    }

    // Set generation parameters in job
    job.setLong(RandomShapeGenerator.GenerationSize, size);
    SpatialSite.setPrism(job, RandomShapeGenerator.GenerationMBR, mbr);
    if (seed != 0)
        job.setLong(RandomShapeGenerator.GenerationSeed, seed);
    if (rectsize != 0)
        job.setInt(RandomShapeGenerator.GenerationRectSize, rectsize);
    if (type != null)
        job.set(RandomShapeGenerator.GenerationType, type.toString());

    ClusterStatus clusterStatus = new JobClient(job).getClusterStatus();
    // Set input format and map class
    job.setInputFormat(RandomInputFormat.class);
    job.setMapperClass(Repartition.RepartitionMap.class);
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(shape.getClass());
    job.setNumMapTasks(10 * Math.max(1, clusterStatus.getMaxMapTasks()));

    SpatialSite.setShapeClass(job, shape.getClass());

    if (blocksize != 0) {
        job.setLong(SpatialSite.LOCAL_INDEX_BLOCK_SIZE, blocksize);
    }

    CellInfo[] cells;
    if (sindex == null) {
        cells = new CellInfo[] { new CellInfo(1, mbr) };
    } else if (sindex.equals("grid")) {
        GridInfo gridInfo = new GridInfo(mbr.t1, mbr.x1, mbr.y1, mbr.t2, mbr.x2, mbr.y2);
        FileSystem fs = file.getFileSystem(job);
        if (blocksize == 0) {
            blocksize = fs.getDefaultBlockSize(file);
        }
        int numOfCells = Repartition.calculateNumberOfPartitions(job, size, fs, file, blocksize);
        gridInfo.calculateCellDimensions(numOfCells);
        cells = gridInfo.getAllCells();
    } else {
        throw new RuntimeException("Unsupported spatial index: " + sindex);
    }

    SpatialSite.setCells(job, cells);

    // Do not set a reduce function. Use the default identity reduce function
    if (cells.length == 1) {
        // All objects are in one partition. No need for a reduce phase
        job.setNumReduceTasks(0);
    } else {
        // More than one partition. Need a reduce phase to group shapes of the
        // same partition together
        job.setReducerClass(RepartitionReduce.class);
        job.setNumReduceTasks(
                Math.max(1, Math.min(cells.length, (clusterStatus.getMaxReduceTasks() * 9 + 5) / 10)));
    }

    // Set output path
    FileOutputFormat.setOutputPath(job, file);
    if (sindex == null || sindex.equals("grid")) {
        job.setOutputFormat(GridOutputFormat.class);
    } else {
        throw new RuntimeException("Unsupported spatial index: " + sindex);
    }

    JobClient.runJob(job);

    // Concatenate all master files into one file
    FileStatus[] resultFiles = outFs.listStatus(file, new PathFilter() {
        @Override
        public boolean accept(Path path) {
            return path.getName().contains("_master");
        }
    });
    String ext = resultFiles[0].getPath().getName()
            .substring(resultFiles[0].getPath().getName().lastIndexOf('.'));
    Path masterPath = new Path(file, "_master" + ext);
    OutputStream destOut = outFs.create(masterPath);
    byte[] buffer = new byte[4096];
    for (FileStatus f : resultFiles) {
        InputStream in = outFs.open(f.getPath());
        int bytes_read;
        do {
            bytes_read = in.read(buffer);
            if (bytes_read > 0)
                destOut.write(buffer, 0, bytes_read);
        } while (bytes_read > 0);
        in.close();
        outFs.delete(f.getPath(), false);
    }
    destOut.close();

    // Plot an image for the partitions used in file
    Path imagePath = new Path(file, "_partitions.png");
    int imageSize = (int) (Math.sqrt(cells.length) * 300);
    Plot.plotLocal(masterPath, imagePath, new Partition(), imageSize, imageSize, Color.BLACK, false, false,
            false);
}

From source file:com.ricemap.spateDB.util.RandomSpatialGenerator.java

License:Apache License

/**
 * Generates random rectangles and write the result to a file.
 * @param outFS - The file system that contains the output file
 * @param outputFile - The file name to write to. If either outFS or
 *   outputFile is null, data is generated to the standard output
 * @param mbr - The whole MBR to generate in
 * @param shape /*from   w  w w . jav a2 s. c  o  m*/
 * @param totalSize - The total size of the generated file
 * @param blocksize 
 * @throws IOException 
 */
public static void generateFileLocal(Path outFile, Shape shape, String sindex, long totalSize, Prism mbr,
        DistributionType type, int rectSize, long seed, long blocksize, boolean overwrite) throws IOException {
    FileSystem outFS = outFile.getFileSystem(new Configuration());
    if (blocksize == 0)
        blocksize = outFS.getDefaultBlockSize(outFile);

    // Calculate the dimensions of each partition based on gindex type
    CellInfo[] cells;
    if (sindex == null) {
        cells = new CellInfo[] { new CellInfo(1, mbr) };
    } else if (sindex.equals("grid")) {
        int num_partitions = Repartition.calculateNumberOfPartitions(new Configuration(), totalSize, outFS,
                outFile, blocksize);

        GridInfo gridInfo = new GridInfo(mbr.t1, mbr.x1, mbr.y1, mbr.t2, mbr.x2, mbr.y2);
        gridInfo.calculateCellDimensions(num_partitions);
        cells = gridInfo.getAllCells();
    } else {
        throw new RuntimeException("Unsupported spatial index: " + sindex);
    }

    // Overwrite output file
    if (outFS.exists(outFile)) {
        if (overwrite)
            outFS.delete(outFile, true);
        else
            throw new RuntimeException(
                    "Output file '" + outFile + "' already exists and overwrite flag is not set");
    }
    outFS.mkdirs(outFile);

    ShapeRecordWriter<Shape> writer;
    if (sindex == null || sindex.equals("grid")) {
        writer = new GridRecordWriter<Shape>(outFile, null, null, cells, false, false);
    } else {
        throw new RuntimeException("Unupoorted spatial idnex: " + sindex);
    }

    if (rectSize == 0)
        rectSize = 100;
    long t1 = System.currentTimeMillis();

    RandomShapeGenerator<Shape> generator = new RandomShapeGenerator<Shape>(totalSize, mbr, type, rectSize,
            seed);

    Prism key = generator.createKey();

    while (generator.next(key, shape)) {
        // Serialize it to text
        writer.write(NullWritable.get(), shape);
    }
    writer.close(null);
    long t2 = System.currentTimeMillis();

    System.out.println("Generation time: " + (t2 - t1) + " millis");
}

From source file:com.rim.logdriver.admin.HFind.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    final long startTime = System.currentTimeMillis();

    int i = 0;/*  w ww . j  ava2  s .  c om*/
    while (i < args.length) {
        if (args[i].startsWith("-")) {
            break;
        }

        Path path = new Path(args[i]);
        FileSystem fs = path.getFileSystem(getConf());
        FileStatus[] fileStatuses = fs.globStatus(path);
        if (fileStatuses != null) {
            for (FileStatus fileStatus : fileStatuses) {
                paths.add(fileStatus.getPath());
                fileStatusCache.put(fileStatus.getPath(), fileStatus);
            }
        }

        i++;
    }

    while (i < args.length) {
        // -print action
        if ("-print".equals(args[i])) {
            actions.add(new FileStatusFilter() {
                @Override
                public boolean accept(FileStatus fileStatus) {
                    System.out.println(fileStatus.getPath());
                    return true;
                }
            });
        }

        // -delete action
        if ("-delete".equals(args[i])) {
            actions.add(new FileStatusFilter() {
                @Override
                public boolean accept(FileStatus fileStatus) {
                    try {
                        FileSystem fs = fileStatus.getPath().getFileSystem(getConf());
                        if (!fileStatus.isDir() || fs.listStatus(fileStatus.getPath()).length == 0) {
                            return fs.delete(fileStatus.getPath(), true);
                        }
                    } catch (IOException e) {
                        e.printStackTrace();
                    }
                    return false;
                }
            });
        }

        // -atime test
        else if ("-atime".equals(args[i])) {
            i++;
            if (i >= args.length) {
                System.err.println("Missing arguement for -atime");
                System.exit(1);
            }

            String t = args[i];
            if (t.charAt(0) == '+') {
                final long time = Long.parseLong(t.substring(1));
                tests.add(new FileStatusFilter() {
                    @Override
                    public boolean accept(FileStatus fileStatus) {
                        if ((startTime - fileStatus.getAccessTime()) / (24 * 60 * 60 * 1000) > time) {
                            return true;
                        } else {
                            return false;
                        }
                    }
                });
            } else if (t.charAt(0) == '-') {
                final long time = Long.parseLong(t.substring(1));
                tests.add(new FileStatusFilter() {
                    @Override
                    public boolean accept(FileStatus fileStatus) {
                        if ((startTime - fileStatus.getAccessTime()) / (24 * 60 * 60 * 1000) < time) {
                            return true;
                        } else {
                            return false;
                        }
                    }
                });
            } else {
                final long time = Long.parseLong(t);
                tests.add(new FileStatusFilter() {
                    @Override
                    public boolean accept(FileStatus fileStatus) {
                        if ((startTime - fileStatus.getAccessTime()) / (24 * 60 * 60 * 1000) == time) {
                            return true;
                        } else {
                            return false;
                        }
                    }
                });
            }
        }

        // -mtime test
        else if ("-mtime".equals(args[i])) {
            i++;
            if (i >= args.length) {
                System.err.println("Missing arguement for -mtime");
                System.exit(1);
            }

            String t = args[i];
            if (t.charAt(0) == '+') {
                final long time = Long.parseLong(t.substring(1));
                tests.add(new FileStatusFilter() {
                    @Override
                    public boolean accept(FileStatus fileStatus) {
                        if ((startTime - fileStatus.getModificationTime()) / (24 * 60 * 60 * 1000) > time) {
                            return true;
                        } else {
                            return false;
                        }
                    }
                });
            } else if (t.charAt(0) == '-') {
                final long time = Long.parseLong(t.substring(1));
                tests.add(new FileStatusFilter() {
                    @Override
                    public boolean accept(FileStatus fileStatus) {
                        if ((startTime - fileStatus.getModificationTime()) / (24 * 60 * 60 * 1000) < time) {
                            return true;
                        } else {
                            return false;
                        }
                    }
                });
            } else {
                final long time = Long.parseLong(t);
                tests.add(new FileStatusFilter() {
                    @Override
                    public boolean accept(FileStatus fileStatus) {
                        if ((startTime - fileStatus.getModificationTime()) / (24 * 60 * 60 * 1000) == time) {
                            return true;
                        } else {
                            return false;
                        }
                    }
                });
            }
        }

        // -amin test
        else if ("-amin".equals(args[i])) {
            i++;
            if (i >= args.length) {
                System.err.println("Missing arguement for -amin");
                System.exit(1);
            }

            String t = args[i];
            if (t.charAt(0) == '+') {
                final long time = Long.parseLong(t.substring(1));
                tests.add(new FileStatusFilter() {
                    @Override
                    public boolean accept(FileStatus fileStatus) {
                        if ((startTime - fileStatus.getAccessTime()) / (60 * 1000) > time) {
                            return true;
                        } else {
                            return false;
                        }
                    }
                });
            } else if (t.charAt(0) == '-') {
                final long time = Long.parseLong(t.substring(1));
                tests.add(new FileStatusFilter() {
                    @Override
                    public boolean accept(FileStatus fileStatus) {
                        if ((startTime - fileStatus.getAccessTime()) / (60 * 1000) < time) {
                            return true;
                        } else {
                            return false;
                        }
                    }
                });
            } else {
                final long time = Long.parseLong(t);
                tests.add(new FileStatusFilter() {
                    @Override
                    public boolean accept(FileStatus fileStatus) {
                        if ((startTime - fileStatus.getAccessTime()) / (60 * 1000) == time) {
                            return true;
                        } else {
                            return false;
                        }
                    }
                });
            }
        }

        // -mmin test
        else if ("-mmin".equals(args[i])) {
            i++;
            if (i >= args.length) {
                System.err.println("Missing arguement for -mmin");
                System.exit(1);
            }

            String t = args[i];
            if (t.charAt(0) == '+') {
                final long time = Long.parseLong(t.substring(1));
                tests.add(new FileStatusFilter() {
                    @Override
                    public boolean accept(FileStatus fileStatus) {
                        if ((startTime - fileStatus.getModificationTime()) / (60 * 1000) > time) {
                            return true;
                        } else {
                            return false;
                        }
                    }
                });
            } else if (t.charAt(0) == '-') {
                final long time = Long.parseLong(t.substring(1));
                tests.add(new FileStatusFilter() {
                    @Override
                    public boolean accept(FileStatus fileStatus) {
                        if ((startTime - fileStatus.getModificationTime()) / (60 * 1000) < time) {
                            return true;
                        } else {
                            return false;
                        }
                    }
                });
            } else {
                final long time = Long.parseLong(t);
                tests.add(new FileStatusFilter() {
                    @Override
                    public boolean accept(FileStatus fileStatus) {
                        if ((startTime - fileStatus.getModificationTime()) / (60 * 1000) == time) {
                            return true;
                        } else {
                            return false;
                        }
                    }
                });
            }
        }

        // -regex test
        else if ("-regex".equals(args[i])) {
            i++;
            if (i >= args.length) {
                System.err.println("Missing arguement for -regex");
                System.exit(1);
            }

            final Pattern p = Pattern.compile(args[i]);
            tests.add(new FileStatusFilter() {
                @Override
                public boolean accept(FileStatus fileStatus) {
                    if (p.matcher(fileStatus.getPath().toString()).matches()) {
                        return true;
                    } else {
                        return false;
                    }
                }
            });
        }

        i++;
    }

    if (actions.size() == 0) {
        actions.add(new FileStatusFilter() {
            @Override
            public boolean accept(FileStatus fileStatus) {
                System.out.println(fileStatus.getPath());
                return true;
            }
        });
    }

    search();

    return 0;
}

From source file:com.rim.logdriver.admin.LogMaintenance.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    // If run by Oozie, then load the Oozie conf too
    if (System.getProperty("oozie.action.conf.xml") != null) {
        conf.addResource(new URL("file://" + System.getProperty("oozie.action.conf.xml")));
    }//from   ww  w  . j a  v a2  s .  c o m

    // For some reason, Oozie needs some options to be set in system instead of
    // in the confiuration. So copy the configs over.
    {
        Iterator<Entry<String, String>> i = conf.iterator();
        while (i.hasNext()) {
            Entry<String, String> next = i.next();
            System.setProperty(next.getKey(), next.getValue());
        }
    }

    if (args.length < 3) {
        printUsage();
        return 1;
    }

    String userName = args[0];
    String dcNumber = args[1];
    String service = args[2];
    String date = null;
    String hour = null;
    if (args.length >= 4) {
        date = args[3];
    }
    if (args.length >= 5) {
        hour = args[4];
    }

    // Set from environment variables
    oozieUrl = getConfOrEnv(conf, "OOZIE_URL");
    String mergeJobPropertiesFile = getConfOrEnv(conf, "MERGEJOB_CONF");
    String filterJobPropertiesFile = getConfOrEnv(conf, "FILTERJOB_CONF");
    String daysBeforeArchive = getConfOrEnv(conf, "DAYS_BEFORE_ARCHIVE");
    String daysBeforeDelete = getConfOrEnv(conf, "DAYS_BEFORE_DELETE");
    String maxConcurrentMergeJobs = getConfOrEnv(conf, "MAX_CONCURRENT_MERGE_JOBS");
    String maxConcurrentFilterJobs = getConfOrEnv(conf, "MAX_CONCURRENT_FILTER_JOBS");
    String zkConnectString = getConfOrEnv(conf, "ZK_CONNECT_STRING");
    String logdir = getConfOrEnv(conf, "logdriver.logdir.name");
    boolean resetOrphanedJobs = Boolean.parseBoolean(getConfOrEnv(conf, "reset.orphaned.jobs"));
    String rootDir = getConfOrEnv(conf, "service.root.dir");

    boolean doMerge = true;
    boolean doArchive = true;
    boolean doDelete = true;

    if (oozieUrl == null) {
        LOG.info("OOZIE_URL is not set.  Not merging or archiving.");
        doMerge = false;
        doArchive = false;
    }
    if (zkConnectString == null) {
        LOG.error("ZK_CONNECT_STRING is not set.  Exiting.");
        return 1;
    }
    if (mergeJobPropertiesFile == null) {
        LOG.info("MERGEJOB_CONF is not set.  Not merging.");
        doMerge = false;
    }
    if (filterJobPropertiesFile == null) {
        LOG.info("FILTERJOB_CONF is not set.  Not archiving.");
        doArchive = false;
    }
    if (daysBeforeArchive == null) {
        LOG.info("DAYS_BEFORE_ARCHIVE is not set.  Not archiving.");
        doArchive = false;
    }
    if (doArchive && Integer.parseInt(daysBeforeArchive) < 0) {
        LOG.info("DAYS_BEFORE_ARCHIVE is negative.  Not archiving.");
        doArchive = false;
    }
    if (daysBeforeDelete == null) {
        LOG.info("DAYS_BEFORE_DELETE is not set.  Not deleting.");
        doDelete = false;
    }
    if (doDelete && Integer.parseInt(daysBeforeDelete) < 0) {
        LOG.info("DAYS_BEFORE_DELETE is negative.  Not deleting.");
        doDelete = false;
    }
    if (maxConcurrentMergeJobs == null) {
        LOG.info("MAX_CONCURRENT_MERGE_JOBS is not set.  Using default value of -1.");
        maxConcurrentMergeJobs = "-1";
    }
    if (maxConcurrentFilterJobs == null) {
        LOG.info("MAX_CONCURRENT_FILTER_JOBS is not set.  Using default value of -1.");
        maxConcurrentMergeJobs = "-1";
    }
    if (logdir == null) {
        LOG.info("LOGDRIVER_LOGDIR_NAME is not set.  Using default value of 'logs'.");
        logdir = "logs";
    }
    if (rootDir == null) {
        LOG.info("SERVICE_ROOT_DIR is not set.  Using default value of 'service'.");
        rootDir = "/service";
    }

    // Now it's safe to create our Oozie Runners.
    OozieRunner mergeOozieRunner = new OozieRunner(oozieUrl, Integer.parseInt(maxConcurrentMergeJobs));
    Thread mergeOozieRunnerThread = new Thread(mergeOozieRunner);
    mergeOozieRunnerThread.setName("OozieRunner - Merge");
    mergeOozieRunnerThread.setDaemon(false);
    mergeOozieRunnerThread.start();

    OozieRunner filterOozieRunner = new OozieRunner(oozieUrl, Integer.parseInt(maxConcurrentFilterJobs));
    Thread filterOozieRunnerThread = new Thread(filterOozieRunner);
    filterOozieRunnerThread.setName("OozieRunner - Filter");
    filterOozieRunnerThread.setDaemon(false);
    filterOozieRunnerThread.start();

    // Figure out what date we start filters on.
    String filterCutoffDate = "";
    if (doArchive) {
        Calendar cal = Calendar.getInstance();
        cal.add(Calendar.DAY_OF_MONTH, Integer.parseInt("-" + daysBeforeArchive));
        filterCutoffDate = String.format("%04d%02d%02d%02d", cal.get(Calendar.YEAR),
                (cal.get(Calendar.MONTH) + 1), cal.get(Calendar.DAY_OF_MONTH), cal.get(Calendar.HOUR_OF_DAY));
        LOG.info("Archiving logs from before {}", filterCutoffDate);
    }
    String deleteCutoffDate = "";
    if (doDelete) {
        Calendar cal = Calendar.getInstance();
        cal.add(Calendar.DAY_OF_MONTH, Integer.parseInt("-" + daysBeforeDelete));
        deleteCutoffDate = String.format("%04d%02d%02d%02d", cal.get(Calendar.YEAR),
                (cal.get(Calendar.MONTH) + 1), cal.get(Calendar.DAY_OF_MONTH), cal.get(Calendar.HOUR_OF_DAY));
        LOG.info("Deleting logs from before {}", deleteCutoffDate);
    }

    long now = System.currentTimeMillis();

    // Various exceptions have been popping up here. So make sure I catch them
    // all.
    try {
        // We can hang if this fails. So make sure we abort if it fails.
        FileSystem fs = null;
        try {
            fs = FileSystem.get(conf);
            fs.exists(new Path("/")); // Test if it works.
        } catch (IOException e) {
            LOG.error("Error getting filesystem.", e);
            return 1;
        }
        // We'll need an Oozie client to check on orphaned directories.
        oozieClient = getOozieClient();

        // LockUtils are used in a couple of places
        LockUtil lu = new LockUtil(zkConnectString);

        // Patterns to recognize hour, day and incoming directories, so that they
        // can be processed.
        Pattern datePathPattern;
        Pattern hourPathPattern;
        Pattern incomingPathPattern;
        Pattern dataPathPattern;
        Pattern archivePathPattern;
        Pattern workingPathPattern;
        if (hour != null) {
            datePathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/"
                    + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(" + Pattern.quote(date) + ")");
            hourPathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/"
                    + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(" + Pattern.quote(date) + ")/("
                    + Pattern.quote(hour) + ")");
            incomingPathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/"
                    + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(" + Pattern.quote(date) + ")/("
                    + Pattern.quote(hour) + ")/([^/]+)/incoming");
            dataPathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/"
                    + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(" + Pattern.quote(date) + ")/("
                    + Pattern.quote(hour) + ")/([^/]+)/data");
            archivePathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/"
                    + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(" + Pattern.quote(date) + ")/("
                    + Pattern.quote(hour) + ")/([^/]+)/archive");
            workingPathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/"
                    + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(" + Pattern.quote(date) + ")/("
                    + Pattern.quote(hour) + ")/([^/]+)/working/([^/]+)_(\\d+)");
        } else if (date != null) {
            datePathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/"
                    + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(" + Pattern.quote(date) + ")");
            hourPathPattern = Pattern
                    .compile(rootDir + "/" + Pattern.quote(dcNumber) + "/" + Pattern.quote(service) + "/"
                            + Pattern.quote(logdir) + "/(" + Pattern.quote(date) + ")/(\\d{2})");
            incomingPathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/"
                    + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(" + Pattern.quote(date)
                    + ")/(\\d{2})/([^/]+)/incoming");
            dataPathPattern = Pattern
                    .compile(rootDir + "/" + Pattern.quote(dcNumber) + "/" + Pattern.quote(service) + "/"
                            + Pattern.quote(logdir) + "/(" + Pattern.quote(date) + ")/(\\d{2})/([^/]+)/data");
            archivePathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/"
                    + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(" + Pattern.quote(date)
                    + ")/(\\d{2})/([^/]+)/archive");
            workingPathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/"
                    + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(" + Pattern.quote(date)
                    + ")/(\\d{2})/([^/]+)/working/([^/]+)_(\\d+)");
        } else {
            datePathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/"
                    + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(\\d{8})");
            hourPathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/"
                    + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(\\d{8})/(\\d{2})");
            incomingPathPattern = Pattern
                    .compile(rootDir + "/" + Pattern.quote(dcNumber) + "/" + Pattern.quote(service) + "/"
                            + Pattern.quote(logdir) + "/(\\d{8})/(\\d{2})/([^/]+)/incoming");
            dataPathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/"
                    + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(\\d{8})/(\\d{2})/([^/]+)/data");
            archivePathPattern = Pattern
                    .compile(rootDir + "/" + Pattern.quote(dcNumber) + "/" + Pattern.quote(service) + "/"
                            + Pattern.quote(logdir) + "/(\\d{8})/(\\d{2})/([^/]+)/archive");
            workingPathPattern = Pattern
                    .compile(rootDir + "/" + Pattern.quote(dcNumber) + "/" + Pattern.quote(service) + "/"
                            + Pattern.quote(logdir) + "/(\\d{8})/(\\d{2})/([^/]+)/working/([^/]+)_(\\d+)");
        }

        // Do a depth first search of the directory, processing anything that
        // looks
        // interesting along the way
        Deque<Path> paths = new ArrayDeque<Path>();
        Path rootPath = new Path(rootDir + "/" + dcNumber + "/" + service + "/" + logdir + "/");
        paths.push(rootPath);

        while (paths.size() > 0) {
            Path p = paths.pop();
            LOG.debug("{}", p.toString());

            if (!fs.exists(p)) {
                continue;
            }

            FileStatus dirStatus = fs.getFileStatus(p);
            FileStatus[] children = fs.listStatus(p);
            boolean addChildren = true;

            boolean old = dirStatus.getModificationTime() < now - WAIT_TIME;
            LOG.debug("    Was last modified {}ms ago", now - dirStatus.getModificationTime());

            if (!old) {
                LOG.debug("    Skipping, since it's not old enough.");

            } else if ((!rootPath.equals(p)) && (children.length == 0
                    || (children.length == 1 && children[0].getPath().getName().equals(READY_MARKER)))) {
                // old and no children? Delete!
                LOG.info("    Deleting empty directory {}", p.toString());
                fs.delete(p, true);

            } else {
                Matcher matcher = datePathPattern.matcher(p.toUri().getPath());
                if (matcher.matches()) {
                    LOG.debug("Checking date directory");

                    // If this is already done, then skip it. So only process if it
                    // doesn't exist.
                    if (fs.exists(new Path(p, READY_MARKER)) == false) {
                        // Check each subdirectory. If they all have ready markers, then I
                        // guess we're ready.
                        boolean ready = true;
                        for (FileStatus c : children) {
                            if (c.isDir() && fs.exists(new Path(c.getPath(), READY_MARKER)) == false) {
                                ready = false;
                                break;
                            }
                        }

                        if (ready) {
                            fs.createNewFile(new Path(p, READY_MARKER));
                        }
                    }
                }

                matcher = hourPathPattern.matcher(p.toUri().getPath());
                if (matcher.matches()) {
                    LOG.debug("Checking hour directory");

                    // If this is already done, then skip it. So only process if it
                    // doesn't exist.
                    if (fs.exists(new Path(p, READY_MARKER)) == false) {
                        // Check each subdirectory. If they all have ready markers, then I
                        // guess we're ready.
                        boolean ready = true;
                        for (FileStatus c : children) {
                            if (c.isDir() && fs.exists(new Path(c.getPath(), READY_MARKER)) == false) {
                                ready = false;
                                break;
                            }
                        }

                        if (ready) {
                            fs.createNewFile(new Path(p, READY_MARKER));
                        }
                    }
                }

                // Check to see if we have to run a merge
                matcher = incomingPathPattern.matcher(p.toUri().getPath());
                if (matcher.matches()) {
                    LOG.debug("Checking incoming directory");
                    String matchDate = matcher.group(1);
                    String matchHour = matcher.group(2);
                    String matchComponent = matcher.group(3);

                    String timestamp = matchDate + matchHour;

                    if (doDelete && timestamp.compareTo(deleteCutoffDate) < 0) {
                        LOG.info("Deleting old directory: {}", p);
                        fs.delete(p, true);
                        addChildren = false;
                    } else if (doMerge) {

                        // old, looks right, and has children? Run it!
                        boolean hasMatchingChildren = false;
                        boolean subdirTooYoung = false;

                        for (FileStatus child : children) {
                            if (!hasMatchingChildren) {
                                FileStatus[] grandchildren = fs.listStatus(child.getPath());
                                for (FileStatus gc : grandchildren) {
                                    if (VALID_FILE.matcher(gc.getPath().getName()).matches()) {
                                        hasMatchingChildren = true;
                                        break;
                                    }
                                }
                            }
                            if (!subdirTooYoung) {
                                if (child.getModificationTime() >= now - WAIT_TIME) {
                                    subdirTooYoung = true;
                                    LOG.debug("    Subdir {} is too young.", child.getPath());
                                }
                            }
                        }

                        if (!hasMatchingChildren) {
                            LOG.debug("    No files match the expected pattern ({})", VALID_FILE.pattern());
                        }

                        if (hasMatchingChildren && !subdirTooYoung) {
                            LOG.info("    Run Merge job {} :: {} {} {} {} {}", new Object[] { p.toString(),
                                    dcNumber, service, matchDate, matchHour, matchComponent });

                            Properties oozieJobProps = new Properties();
                            oozieJobProps.load(new FileInputStream(mergeJobPropertiesFile));

                            oozieJobProps.setProperty("rootDir", rootDir);
                            oozieJobProps.setProperty("dcNumber", dcNumber);
                            oozieJobProps.setProperty("service", service);
                            oozieJobProps.setProperty("date", matchDate);
                            oozieJobProps.setProperty("hour", matchHour);
                            oozieJobProps.setProperty("component", matchComponent);
                            oozieJobProps.setProperty("user.name", userName);
                            oozieJobProps.setProperty("logdir", logdir);

                            mergeOozieRunner.submit(oozieJobProps);

                            addChildren = false;
                        }
                    }
                }

                // Check to see if we need to run a filter and archive
                matcher = dataPathPattern.matcher(p.toUri().getPath());
                if (matcher.matches()) {
                    String matchDate = matcher.group(1);
                    String matchHour = matcher.group(2);
                    String matchComponent = matcher.group(3);

                    String timestamp = matchDate + matchHour;

                    if (doDelete && timestamp.compareTo(deleteCutoffDate) < 0) {
                        LOG.info("Deleting old directory: {}", p);
                        fs.delete(p, true);
                        addChildren = false;
                    } else if (doArchive && timestamp.compareTo(filterCutoffDate) < 0) {

                        Properties oozieJobProps = new Properties();
                        oozieJobProps.load(new FileInputStream(filterJobPropertiesFile));

                        oozieJobProps.setProperty("rootDir", rootDir);
                        oozieJobProps.setProperty("dcNumber", dcNumber);
                        oozieJobProps.setProperty("service", service);
                        oozieJobProps.setProperty("date", matchDate);
                        oozieJobProps.setProperty("hour", matchHour);
                        oozieJobProps.setProperty("component", matchComponent);
                        oozieJobProps.setProperty("user.name", userName);
                        oozieJobProps.setProperty("logdir", logdir);

                        // Check to see if we should just keep all or delete all here.
                        // The filter file should be here
                        String appPath = oozieJobProps.getProperty("oozie.wf.application.path");
                        appPath = appPath.replaceFirst("\\$\\{.*?\\}", "");
                        Path filterFile = new Path(appPath + "/" + service + ".yaml");
                        LOG.info("Filter file is {}", filterFile);
                        if (fs.exists(filterFile)) {
                            List<BoomFilterMapper.Filter> filters = BoomFilterMapper.loadFilters(matchComponent,
                                    fs.open(filterFile));

                            if (filters == null) {
                                LOG.warn(
                                        "    Got null when getting filters.  Not processing. {} :: {} {} {} {} {}",
                                        new Object[] { p.toString(), dcNumber, service, matchDate, matchHour,
                                                matchComponent });
                            } else if (filters.size() == 0) {
                                LOG.warn("    Got no filters.  Not processing. {} :: {} {} {} {} {}",
                                        new Object[] { p.toString(), dcNumber, service, matchDate, matchHour,
                                                matchComponent });
                            } else if (filters.size() == 1
                                    && filters.get(0) instanceof BoomFilterMapper.KeepAllFilter) {
                                LOG.info("    Keeping everything. {} :: {} {} {} {} {}",
                                        new Object[] { p.toString(), dcNumber, service, matchDate, matchHour,
                                                matchComponent });
                                // Move files from data to archive
                                // delete it all!
                                String destination = rootDir + "/" + dcNumber + "/" + service + "/" + logdir
                                        + "/" + matchDate + "/" + matchHour + "/" + matchComponent
                                        + "/archive/";

                                String[] moveArgs = { zkConnectString, dcNumber, service, matchDate, matchHour,
                                        matchComponent, "move " + p.toUri().getPath() + " " + destination };
                                ToolRunner.run(new Configuration(), new LockedFs(), moveArgs);
                            } else if (filters.size() == 1
                                    && filters.get(0) instanceof BoomFilterMapper.DropAllFilter) {
                                LOG.info("    Dropping everything. {} :: {} {} {} {} {}",
                                        new Object[] { p.toString(), dcNumber, service, matchDate, matchHour,
                                                matchComponent });
                                // delete it all!
                                String[] delArgs = { zkConnectString, dcNumber, service, matchDate, matchHour,
                                        matchComponent, "delete " + p.toUri().getPath() };
                                ToolRunner.run(new Configuration(), new LockedFs(), delArgs);
                            } else {
                                LOG.info("    Run Filter/Archive job {} :: {} {} {} {} {}",
                                        new Object[] { p.toString(), dcNumber, service, matchDate, matchHour,
                                                matchComponent });
                                filterOozieRunner.submit(oozieJobProps);
                            }
                        } else {
                            LOG.warn("Skipping filter job, since no filter file exists");
                        }

                        addChildren = false;
                    }
                }

                matcher = archivePathPattern.matcher(p.toUri().getPath());
                if (matcher.matches()) {
                    String matchDate = matcher.group(1);
                    String matchHour = matcher.group(2);

                    String timestamp = matchDate + matchHour;

                    if (doDelete && timestamp.compareTo(deleteCutoffDate) < 0) {
                        LOG.info("Deleting old directory: {}", p);
                        fs.delete(p, true);
                        addChildren = false;
                    }
                }

                matcher = workingPathPattern.matcher(p.toUri().getPath());
                if (matcher.matches()) {
                    LOG.info("  Matches working pattern");
                    if (resetOrphanedJobs) {
                        String matchDate = matcher.group(1);
                        String matchHour = matcher.group(2);
                        String matchComponent = matcher.group(3);
                        String matchOozieJobId = matcher.group(4);

                        // Check to see what's up with the oozie job. If it's still
                        // running,
                        // we don't want to touch it.
                        Status status = null;
                        try {
                            WorkflowJob jobInfo = oozieClient.getJobInfo(matchOozieJobId);
                            status = jobInfo.getStatus();
                        } catch (OozieClientException e) {
                            if (e.getMessage() != null && e.getMessage().contains("Job does not exist")) {
                                LOG.info("Oozie job not found.  Proceeding as though job was failed.", e);
                                status = Status.FAILED;
                            } else {
                                LOG.error("Oozie client error.  Not Proceeding.", e);
                            }
                        }
                        LOG.info("  Oozie job status is {}", status);
                        if (status != null && status != Status.RUNNING && status != Status.PREP
                                && status != Status.SUSPENDED) {
                            // Move everything from working/xxx/incoming/ to incoming/
                            PathInfo lockPathInfo = new PathInfo(rootDir + "/" + dcNumber + "/" + service + "/"
                                    + logdir + "/" + matchDate + "/" + matchHour + "/" + matchComponent);
                            lu.acquireWriteLock(lu.getLockPath(lockPathInfo));

                            FileStatus[] fileStatuses = fs
                                    .listStatus(new Path(p.toUri().getPath() + "/incoming/"));
                            if (fileStatuses != null) {
                                for (FileStatus fileStatus : fileStatuses) {
                                    Path toPath = new Path(fileStatus.getPath().getParent().getParent()
                                            .getParent().getParent(),
                                            "incoming/" + fileStatus.getPath().getName());

                                    LOG.info("  Moving data from {} to {}", fileStatus.getPath(), toPath);
                                    LOG.info("    mkdir {}", toPath);
                                    fs.mkdirs(toPath);

                                    Path fromDir = new Path(p.toUri().getPath(),
                                            "incoming/" + fileStatus.getPath().getName());
                                    LOG.info("    moving from {}", fromDir);
                                    FileStatus[] files = fs.listStatus(fromDir);
                                    if (files == null || files.length == 0) {
                                        LOG.info("    Nothing to move from  {}", fromDir);
                                    } else {
                                        for (FileStatus f : files) {
                                            LOG.info("    rename {} {}", f.getPath(),
                                                    new Path(toPath, f.getPath().getName()));
                                            fs.rename(f.getPath(), new Path(toPath, f.getPath().getName()));
                                        }
                                    }

                                    LOG.info("    rm {}", fileStatus.getPath().getParent().getParent());
                                    fs.delete(fileStatus.getPath().getParent().getParent(), true);
                                }

                                lu.releaseWriteLock(lu.getLockPath(lockPathInfo));

                            }
                        }
                    }

                    addChildren = false;
                }
            }

            // Add any children which are directories to the stack.
            if (addChildren) {
                for (int i = children.length - 1; i >= 0; i--) {
                    FileStatus child = children[i];
                    if (child.isDir()) {
                        paths.push(child.getPath());
                    }
                }
            }
        }

        // Since we may have deleted a bunch of directories, delete any unused
        // locks
        // from ZooKeeper.
        {
            LOG.info("Checking for unused locks in ZooKeeper");
            String scanPath = rootDir + "/" + dcNumber + "/" + service + "/" + logdir;
            if (date != null) {
                scanPath += "/" + date;
                if (hour != null) {
                    scanPath += "/" + hour;
                }
            }

            List<LockInfo> lockInfo = lu.scan(scanPath);

            for (LockInfo li : lockInfo) {
                // Check if the lock path still exists in HDFS. If it doesn't, then
                // delete it from ZooKeeper.
                String path = li.getPath();
                String hdfsPath = path.substring(LockUtil.ROOT.length());
                if (!fs.exists(new Path(hdfsPath))) {
                    ZooKeeper zk = lu.getZkClient();

                    while (!path.equals(LockUtil.ROOT)) {
                        try {
                            zk.delete(path, -1);
                        } catch (KeeperException.NotEmptyException e) {
                            // That's fine. just stop trying then.
                            break;
                        } catch (Exception e) {
                            LOG.error("Caught exception trying to delete from ZooKeeper.", e);
                            break;
                        }
                        LOG.info("Deleted from ZooKeeper: {}", path);
                        path = path.substring(0, path.lastIndexOf('/'));
                    }

                }
            }
        }
        lu.close();

        // Now that we're done, wait for the Oozie Runner to stop, and print the
        // results.
        LOG.info("Waiting for Oozie jobs to complete.");
        mergeOozieRunner.shutdown();
        mergeOozieRunnerThread.join();
        LOG.info("Oozie Job Stats : Merge  : Started={} Succeeded={} failed={} errors={}",
                new Object[] { mergeOozieRunner.getStarted(), mergeOozieRunner.getSucceeded(),
                        mergeOozieRunner.getFailed(), mergeOozieRunner.getErrors() });

        filterOozieRunner.shutdown();
        filterOozieRunnerThread.join();
        LOG.info("Oozie Job Stats : Filter : Started={} Succeeded={} failed={} errors={}",
                new Object[] { filterOozieRunner.getStarted(), filterOozieRunner.getSucceeded(),
                        filterOozieRunner.getFailed(), filterOozieRunner.getErrors() });

    } catch (Exception e) {
        LOG.error("Unexpected exception caught.", e);
        return 1;
    }

    return 0;
}

From source file:com.rockstor.compact.Compactor.java

License:Apache License

public void compactData(String taskIdName) throws IOException, NoSuchAlgorithmException {
    Path dstDir = new Path(pathUtil.getSpecTaskDir(taskIdName));
    FileSystem dfs = RockAccessor.getFileSystem();
    if (!dfs.exists(dstDir)) {
        LOG.error("[COMPACTOR]: Directory " + dstDir + " is not exist");
        return;/*from  w ww. j  av a2s .  com*/
    }

    String metaFileName = pathUtil.getTaskMetaPath(taskIdName);
    if (!dfs.exists(new Path(metaFileName))) {
        LOG.error("[COMPACTOR]: meta file " + metaFileName + " is not existed");
        return;
    }

    // compact data
    // 1. create rock data file
    String rockIdStr = null;

    // 2. create rock index file

    // 3. load meta file
    TaskMetaReader rocksMeta = new TaskMetaReader();
    rocksMeta.open(metaFileName);
    Map<String, byte[]> rocks = rocksMeta.getRocks();
    rocksMeta.close();
    // 4. compact rock files one by one
    /*
     * for(rock:rocks){ load rock gb from db; load rock gb from delete file
     * sort gb by offset copy chunks to new data file, and write new index,
     * if offset is in gb set, drop and continue }
     */
    Map<Long, Long> gbIndexes = null;
    RockIndexReader rockIndexReader = null;
    RockReader rockReader = null;
    Chunk chunk = null;

    // create rock writer
    RockCompactWriter rockWriter = new RockCompactWriter();
    rockWriter.create(taskIdName);

    rockIdStr = rockWriter.getRockID();

    String dataFileName = pathUtil.getTaskDataPath(taskIdName, rockIdStr);

    String gbIndexPath = null;

    long pos = 0;
    Long size = null;

    for (Entry<String, byte[]> entry : rocks.entrySet()) {
        LOG.info("compacting rock :" + entry.getKey());
    }

    for (Entry<String, byte[]> entry : rocks.entrySet()) {
        gbIndexes = RockDB.getGarbages(entry.getValue());
        rockIndexReader = new RockIndexReader();
        LOG.debug("get " + gbIndexes.size() + " invalid chunks of rock " + entry.getKey() + " from chunk DB");
        gbIndexPath = pathUtil.getGbMetaPath(entry.getKey());
        if (dfs.exists(new Path(gbIndexPath))) {
            rockIndexReader.open(gbIndexPath);

            // merge gb data index
            while (rockIndexReader.hasNext()) {
                chunk = rockIndexReader.next();
                LOG.debug("ignore list append chunk: " + chunk);
                gbIndexes.put(chunk.getOffset(), chunk.getSize() + Chunk.HEADER_LEN);
            }

            rockIndexReader.close();
        }

        // copy chunks and write new index
        rockReader = RockReaderPool.getInstance().get(entry.getKey());
        FSDataInputStream input = rockReader.getFSDataInputStream();
        int pedding_bytes = 0;
        while (rockReader.hasNext()) {
            pos = rockReader.getPos();
            pedding_bytes = (int) (pos & 7);
            if (pedding_bytes != 0) {
                pos = pos + 8 - pedding_bytes;
            }

            // LOG.info("pos now: "+pos);

            size = gbIndexes.get(pos);

            // ignore deleted chunk
            if (size != null) {
                LOG.debug("ignore chunk at " + pos + ", size: " + size);

                rockReader.seekg(pos + size);
                continue;
            }

            chunk = rockReader.nextChunk();
            if (chunk == null) {
                LOG.error("[Compactor] read source chunk from " + entry.getKey() + ":" + pos + " Failed");
                throw new IOException(
                        "[Compactor] read source chunk from " + entry.getKey() + ":" + pos + " Failed");
            }

            rockWriter.addChunk(chunk, input);
        }
    }

    rockWriter.close();

    // 5. rename ${compactorDir}/rockId.dat ==> $(rock_data_dir)/rockId
    dfs.rename(new Path(dataFileName), new Path(Rock.HADOOP_DATA_HOME + "/" + rockIdStr));

    // 6. remove invalid chunks
    removeInvalidChunks(taskIdName);

    // 7. sync left chunks
    syncLeftChunks(taskIdName);

    // 8. remove task dir
    dfs.delete(dstDir, true);
}

From source file:com.rockstor.tools.RockStorFsFormat.java

License:Apache License

protected void cleanDfs() throws IOException {
    RockAccessor.connectHDFS();//  w  ww  . ja  va2 s  . c  om
    String rootDir = conf.get("rockstor.rootdir");
    LOG.info("connect to hdfs ok!");
    FileSystem dfs = RockAccessor.getFileSystem();
    dfs.delete(new Path(rootDir), true);
    LOG.info("remove rockstor root dir " + rootDir + " OK!");
    RockAccessor.disconnectHDFS();
    LOG.info("disconnect from hdfs ok!");
}

From source file:com.savy3.nonequijoin.MapOutputSampler.java

License:Apache License

/**
 * Driver for InputSampler MapReduce Job
 *//*from  w w w .  ja v a  2  s  . c o m*/
public static void runMap(Job job, Path sampleInputPath)
        throws IOException, IllegalStateException, ClassNotFoundException, InterruptedException {
    LOG.info("Running a MapReduce Job on Sample Input File" + sampleInputPath.toString());

    Configuration conf = new Configuration();
    conf.setBoolean("mapreduce.job.ubertask.enable", true);
    conf.set("numSamples", "" + (job.getNumReduceTasks() - 1));
    Job sampleJob = new Job(conf);
    sampleJob.setMapperClass(job.getMapperClass());
    sampleJob.setReducerClass(SampleKeyReducer.class);
    sampleJob.setJarByClass(job.getMapperClass());
    sampleJob.setMapOutputKeyClass(job.getMapOutputKeyClass());
    sampleJob.setMapOutputValueClass(job.getMapOutputValueClass());
    sampleJob.setOutputKeyClass(job.getMapOutputKeyClass());
    sampleJob.setOutputValueClass(NullWritable.class);
    sampleJob.setInputFormatClass(SequenceFileInputFormat.class);
    sampleJob.setOutputFormatClass(SequenceFileOutputFormat.class);

    SequenceFileInputFormat.addInputPath(sampleJob, sampleInputPath);
    FileSystem fs = FileSystem.get(conf);

    Path out = new Path(sampleInputPath.getParent(), "mapOut");
    fs.delete(out, true);

    SequenceFileOutputFormat.setOutputPath(sampleJob, out);

    sampleJob.waitForCompletion(true);

    LOG.info("Sample MapReduce Job Output File" + out.toString());

    Path partFile = new Path(out, "part-r-00000");
    Path tmpFile = new Path("/_tmp");
    fs.delete(tmpFile, true);
    fs.rename(partFile, tmpFile);
    fs.delete(sampleInputPath.getParent(), true);
    fs.rename(new Path("/_tmp"), sampleInputPath.getParent());

    LOG.info("Sample partitioning file cpied to location " + sampleInputPath.getParent().toString());
}

From source file:com.scaleunlimited.cascading.hadoop.HadoopUtils.java

License:Apache License

public static void safeRemove(FileSystem fs, Path path) {
    if ((fs != null) && (path != null)) {
        try {// w w w .ja va2s  .c  o m
            fs.delete(path, true);
        } catch (Throwable t) {
            // Ignore
        }
    }
}