Example usage for org.apache.hadoop.fs FileSystem delete

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem delete.

Prototype

public abstract boolean delete(Path f, boolean recursive) throws IOException;

Source Link

Document

Delete a file.

Usage

From source file:com.ricemap.spateDB.operations.Repartition.java

License:Apache License

/**
 * Repartitions a file on local machine without MapReduce jobs.
 * @param inFs/* w  w w . j  a v a2  s  .c o m*/
 * @param in
 * @param outFs
 * @param out
 * @param cells
 * @param stockShape
 * @param rtree
 * @param overwrite
 * @throws IOException 
 */
public static <S extends Shape> void repartitionLocal(Path in, Path out, S stockShape, long blockSize,
        CellInfo[] cells, String sindex, boolean overwrite) throws IOException {
    FileSystem inFs = in.getFileSystem(new Configuration());
    FileSystem outFs = out.getFileSystem(new Configuration());
    // Overwrite output file
    if (outFs.exists(out)) {
        if (overwrite)
            outFs.delete(out, true);
        else
            throw new RuntimeException(
                    "Output file '" + out + "' already exists and overwrite flag is not set");
    }
    outFs.mkdirs(out);

    ShapeRecordWriter<Shape> writer;
    boolean pack = sindex.equals("r+tree");
    boolean expand = sindex.equals("rtree");
    if (sindex.equals("grid")) {
        writer = new GridRecordWriter<Shape>(out, null, null, cells, pack, expand);
    } else if (sindex.equals("rtree") || sindex.equals("r+tree")) {
        writer = new RTreeGridRecordWriter<Shape>(out, null, null, cells, pack, expand);
        writer.setStockObject(stockShape);
    } else {
        throw new RuntimeException("Unupoorted spatial idnex: " + sindex);
    }

    FileStatus inFileStatus = inFs.getFileStatus(in);
    // Copy blocksize from source file if it's globally indexed
    if (blockSize == 0) {
        GlobalIndex<Partition> globalIndex = SpatialSite.getGlobalIndex(inFs, in);
        if (globalIndex != null) {
            blockSize = inFs.getFileStatus(new Path(in, globalIndex.iterator().next().filename)).getBlockSize();
        }
    }
    if (blockSize != 0)
        ((GridRecordWriter<Shape>) writer).setBlockSize(blockSize);

    long length = inFileStatus.getLen();
    FSDataInputStream datain = inFs.open(in);
    ShapeRecordReader<S> reader = new ShapeRecordReader<S>(datain, 0, length);
    Prism c = reader.createKey();

    NullWritable dummy = NullWritable.get();

    while (reader.next(c, stockShape)) {
        writer.write(dummy, stockShape);
    }
    writer.close(null);
}

From source file:com.ricemap.spateDB.operations.Sampler.java

License:Apache License

/**
 * Sample a ratio of the file through a MapReduce job
 * @param fs/*  w  w  w .  j  a  v  a 2 s. co  m*/
 * @param files
 * @param ratio
 * @param threshold - Maximum number of elements to be sampled
 * @param output
 * @param inObj
 * @return
 * @throws IOException
 */
public static <T extends TextSerializable, O extends TextSerializable> int sampleMapReduceWithRatio(
        FileSystem fs, Path[] files, double ratio, long threshold, long seed, final ResultCollector<O> output,
        T inObj, O outObj) throws IOException {
    JobConf job = new JobConf(FileMBR.class);

    Path outputPath;
    FileSystem outFs = FileSystem.get(job);
    do {
        outputPath = new Path(files[0].toUri().getPath() + ".sample_" + (int) (Math.random() * 1000000));
    } while (outFs.exists(outputPath));

    job.setJobName("Sample");
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(Text.class);
    job.setClass(InClass, inObj.getClass(), TextSerializable.class);
    job.setClass(OutClass, outObj.getClass(), TextSerializable.class);

    job.setMapperClass(Map.class);
    job.setLong(RANDOM_SEED, seed);
    job.setFloat(SAMPLE_RATIO, (float) ratio);

    ClusterStatus clusterStatus = new JobClient(job).getClusterStatus();
    job.setNumMapTasks(clusterStatus.getMaxMapTasks() * 5);
    job.setNumReduceTasks(0);

    job.setInputFormat(ShapeLineInputFormat.class);
    job.setOutputFormat(TextOutputFormat.class);

    ShapeLineInputFormat.setInputPaths(job, files);
    TextOutputFormat.setOutputPath(job, outputPath);

    // Submit the job
    RunningJob run_job = JobClient.runJob(job);

    Counters counters = run_job.getCounters();
    Counter outputRecordCounter = counters.findCounter(Task.Counter.MAP_OUTPUT_RECORDS);
    final long resultCount = outputRecordCounter.getValue();

    Counter inputBytesCounter = counters.findCounter(Task.Counter.MAP_INPUT_BYTES);
    Sampler.sizeOfLastProcessedFile = inputBytesCounter.getValue();

    // Ratio of records to return from output based on the threshold
    // Note that any number greater than or equal to one will cause all
    // elements to be returned
    final double selectRatio = (double) threshold / resultCount;

    // Read job result
    int result_size = 0;
    if (output != null) {
        Text line = new Text();
        FileStatus[] results = outFs.listStatus(outputPath);

        for (FileStatus fileStatus : results) {
            if (fileStatus.getLen() > 0 && fileStatus.getPath().getName().startsWith("part-")) {
                LineReader lineReader = new LineReader(outFs.open(fileStatus.getPath()));
                try {
                    while (lineReader.readLine(line) > 0) {
                        if (Math.random() < selectRatio) {
                            if (output != null) {
                                outObj.fromText(line);
                                output.collect(outObj);
                            }
                            result_size++;
                        }
                    }
                } catch (RuntimeException e) {
                    e.printStackTrace();
                }
                lineReader.close();
            }
        }
    }

    outFs.delete(outputPath, true);

    return result_size;
}

From source file:com.ricemap.spateDB.util.RandomSpatialGenerator.java

License:Apache License

public static void generateMapReduce(Path file, Prism mbr, long size, long blocksize, Shape shape,
        String sindex, long seed, int rectsize, RandomShapeGenerator.DistributionType type, boolean overwrite)
        throws IOException {
    JobConf job = new JobConf(RandomSpatialGenerator.class);

    job.setJobName("Generator");
    FileSystem outFs = file.getFileSystem(job);

    // Overwrite output file
    if (outFs.exists(file)) {
        if (overwrite)
            outFs.delete(file, true);
        else// w  w  w . j ava2s.c  o m
            throw new RuntimeException(
                    "Output file '" + file + "' already exists and overwrite flag is not set");
    }

    // Set generation parameters in job
    job.setLong(RandomShapeGenerator.GenerationSize, size);
    SpatialSite.setPrism(job, RandomShapeGenerator.GenerationMBR, mbr);
    if (seed != 0)
        job.setLong(RandomShapeGenerator.GenerationSeed, seed);
    if (rectsize != 0)
        job.setInt(RandomShapeGenerator.GenerationRectSize, rectsize);
    if (type != null)
        job.set(RandomShapeGenerator.GenerationType, type.toString());

    ClusterStatus clusterStatus = new JobClient(job).getClusterStatus();
    // Set input format and map class
    job.setInputFormat(RandomInputFormat.class);
    job.setMapperClass(Repartition.RepartitionMap.class);
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(shape.getClass());
    job.setNumMapTasks(10 * Math.max(1, clusterStatus.getMaxMapTasks()));

    SpatialSite.setShapeClass(job, shape.getClass());

    if (blocksize != 0) {
        job.setLong(SpatialSite.LOCAL_INDEX_BLOCK_SIZE, blocksize);
    }

    CellInfo[] cells;
    if (sindex == null) {
        cells = new CellInfo[] { new CellInfo(1, mbr) };
    } else if (sindex.equals("grid")) {
        GridInfo gridInfo = new GridInfo(mbr.t1, mbr.x1, mbr.y1, mbr.t2, mbr.x2, mbr.y2);
        FileSystem fs = file.getFileSystem(job);
        if (blocksize == 0) {
            blocksize = fs.getDefaultBlockSize(file);
        }
        int numOfCells = Repartition.calculateNumberOfPartitions(job, size, fs, file, blocksize);
        gridInfo.calculateCellDimensions(numOfCells);
        cells = gridInfo.getAllCells();
    } else {
        throw new RuntimeException("Unsupported spatial index: " + sindex);
    }

    SpatialSite.setCells(job, cells);

    // Do not set a reduce function. Use the default identity reduce function
    if (cells.length == 1) {
        // All objects are in one partition. No need for a reduce phase
        job.setNumReduceTasks(0);
    } else {
        // More than one partition. Need a reduce phase to group shapes of the
        // same partition together
        job.setReducerClass(RepartitionReduce.class);
        job.setNumReduceTasks(
                Math.max(1, Math.min(cells.length, (clusterStatus.getMaxReduceTasks() * 9 + 5) / 10)));
    }

    // Set output path
    FileOutputFormat.setOutputPath(job, file);
    if (sindex == null || sindex.equals("grid")) {
        job.setOutputFormat(GridOutputFormat.class);
    } else {
        throw new RuntimeException("Unsupported spatial index: " + sindex);
    }

    JobClient.runJob(job);

    // Concatenate all master files into one file
    FileStatus[] resultFiles = outFs.listStatus(file, new PathFilter() {
        @Override
        public boolean accept(Path path) {
            return path.getName().contains("_master");
        }
    });
    String ext = resultFiles[0].getPath().getName()
            .substring(resultFiles[0].getPath().getName().lastIndexOf('.'));
    Path masterPath = new Path(file, "_master" + ext);
    OutputStream destOut = outFs.create(masterPath);
    byte[] buffer = new byte[4096];
    for (FileStatus f : resultFiles) {
        InputStream in = outFs.open(f.getPath());
        int bytes_read;
        do {
            bytes_read = in.read(buffer);
            if (bytes_read > 0)
                destOut.write(buffer, 0, bytes_read);
        } while (bytes_read > 0);
        in.close();
        outFs.delete(f.getPath(), false);
    }
    destOut.close();

    // Plot an image for the partitions used in file
    Path imagePath = new Path(file, "_partitions.png");
    int imageSize = (int) (Math.sqrt(cells.length) * 300);
    Plot.plotLocal(masterPath, imagePath, new Partition(), imageSize, imageSize, Color.BLACK, false, false,
            false);
}

From source file:com.ricemap.spateDB.util.RandomSpatialGenerator.java

License:Apache License

/**
 * Generates random rectangles and write the result to a file.
 * @param outFS - The file system that contains the output file
 * @param outputFile - The file name to write to. If either outFS or
 *   outputFile is null, data is generated to the standard output
 * @param mbr - The whole MBR to generate in
 * @param shape /*from   w  w w . jav a2 s. c  o  m*/
 * @param totalSize - The total size of the generated file
 * @param blocksize 
 * @throws IOException 
 */
public static void generateFileLocal(Path outFile, Shape shape, String sindex, long totalSize, Prism mbr,
        DistributionType type, int rectSize, long seed, long blocksize, boolean overwrite) throws IOException {
    FileSystem outFS = outFile.getFileSystem(new Configuration());
    if (blocksize == 0)
        blocksize = outFS.getDefaultBlockSize(outFile);

    // Calculate the dimensions of each partition based on gindex type
    CellInfo[] cells;
    if (sindex == null) {
        cells = new CellInfo[] { new CellInfo(1, mbr) };
    } else if (sindex.equals("grid")) {
        int num_partitions = Repartition.calculateNumberOfPartitions(new Configuration(), totalSize, outFS,
                outFile, blocksize);

        GridInfo gridInfo = new GridInfo(mbr.t1, mbr.x1, mbr.y1, mbr.t2, mbr.x2, mbr.y2);
        gridInfo.calculateCellDimensions(num_partitions);
        cells = gridInfo.getAllCells();
    } else {
        throw new RuntimeException("Unsupported spatial index: " + sindex);
    }

    // Overwrite output file
    if (outFS.exists(outFile)) {
        if (overwrite)
            outFS.delete(outFile, true);
        else
            throw new RuntimeException(
                    "Output file '" + outFile + "' already exists and overwrite flag is not set");
    }
    outFS.mkdirs(outFile);

    ShapeRecordWriter<Shape> writer;
    if (sindex == null || sindex.equals("grid")) {
        writer = new GridRecordWriter<Shape>(outFile, null, null, cells, false, false);
    } else {
        throw new RuntimeException("Unupoorted spatial idnex: " + sindex);
    }

    if (rectSize == 0)
        rectSize = 100;
    long t1 = System.currentTimeMillis();

    RandomShapeGenerator<Shape> generator = new RandomShapeGenerator<Shape>(totalSize, mbr, type, rectSize,
            seed);

    Prism key = generator.createKey();

    while (generator.next(key, shape)) {
        // Serialize it to text
        writer.write(NullWritable.get(), shape);
    }
    writer.close(null);
    long t2 = System.currentTimeMillis();

    System.out.println("Generation time: " + (t2 - t1) + " millis");
}

From source file:com.rim.logdriver.admin.HFind.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    final long startTime = System.currentTimeMillis();

    int i = 0;/*  w ww . j  ava2  s .  c om*/
    while (i < args.length) {
        if (args[i].startsWith("-")) {
            break;
        }

        Path path = new Path(args[i]);
        FileSystem fs = path.getFileSystem(getConf());
        FileStatus[] fileStatuses = fs.globStatus(path);
        if (fileStatuses != null) {
            for (FileStatus fileStatus : fileStatuses) {
                paths.add(fileStatus.getPath());
                fileStatusCache.put(fileStatus.getPath(), fileStatus);
            }
        }

        i++;
    }

    while (i < args.length) {
        // -print action
        if ("-print".equals(args[i])) {
            actions.add(new FileStatusFilter() {
                @Override
                public boolean accept(FileStatus fileStatus) {
                    System.out.println(fileStatus.getPath());
                    return true;
                }
            });
        }

        // -delete action
        if ("-delete".equals(args[i])) {
            actions.add(new FileStatusFilter() {
                @Override
                public boolean accept(FileStatus fileStatus) {
                    try {
                        FileSystem fs = fileStatus.getPath().getFileSystem(getConf());
                        if (!fileStatus.isDir() || fs.listStatus(fileStatus.getPath()).length == 0) {
                            return fs.delete(fileStatus.getPath(), true);
                        }
                    } catch (IOException e) {
                        e.printStackTrace();
                    }
                    return false;
                }
            });
        }

        // -atime test
        else if ("-atime".equals(args[i])) {
            i++;
            if (i >= args.length) {
                System.err.println("Missing arguement for -atime");
                System.exit(1);
            }

            String t = args[i];
            if (t.charAt(0) == '+') {
                final long time = Long.parseLong(t.substring(1));
                tests.add(new FileStatusFilter() {
                    @Override
                    public boolean accept(FileStatus fileStatus) {
                        if ((startTime - fileStatus.getAccessTime()) / (24 * 60 * 60 * 1000) > time) {
                            return true;
                        } else {
                            return false;
                        }
                    }
                });
            } else if (t.charAt(0) == '-') {
                final long time = Long.parseLong(t.substring(1));
                tests.add(new FileStatusFilter() {
                    @Override
                    public boolean accept(FileStatus fileStatus) {
                        if ((startTime - fileStatus.getAccessTime()) / (24 * 60 * 60 * 1000) < time) {
                            return true;
                        } else {
                            return false;
                        }
                    }
                });
            } else {
                final long time = Long.parseLong(t);
                tests.add(new FileStatusFilter() {
                    @Override
                    public boolean accept(FileStatus fileStatus) {
                        if ((startTime - fileStatus.getAccessTime()) / (24 * 60 * 60 * 1000) == time) {
                            return true;
                        } else {
                            return false;
                        }
                    }
                });
            }
        }

        // -mtime test
        else if ("-mtime".equals(args[i])) {
            i++;
            if (i >= args.length) {
                System.err.println("Missing arguement for -mtime");
                System.exit(1);
            }

            String t = args[i];
            if (t.charAt(0) == '+') {
                final long time = Long.parseLong(t.substring(1));
                tests.add(new FileStatusFilter() {
                    @Override
                    public boolean accept(FileStatus fileStatus) {
                        if ((startTime - fileStatus.getModificationTime()) / (24 * 60 * 60 * 1000) > time) {
                            return true;
                        } else {
                            return false;
                        }
                    }
                });
            } else if (t.charAt(0) == '-') {
                final long time = Long.parseLong(t.substring(1));
                tests.add(new FileStatusFilter() {
                    @Override
                    public boolean accept(FileStatus fileStatus) {
                        if ((startTime - fileStatus.getModificationTime()) / (24 * 60 * 60 * 1000) < time) {
                            return true;
                        } else {
                            return false;
                        }
                    }
                });
            } else {
                final long time = Long.parseLong(t);
                tests.add(new FileStatusFilter() {
                    @Override
                    public boolean accept(FileStatus fileStatus) {
                        if ((startTime - fileStatus.getModificationTime()) / (24 * 60 * 60 * 1000) == time) {
                            return true;
                        } else {
                            return false;
                        }
                    }
                });
            }
        }

        // -amin test
        else if ("-amin".equals(args[i])) {
            i++;
            if (i >= args.length) {
                System.err.println("Missing arguement for -amin");
                System.exit(1);
            }

            String t = args[i];
            if (t.charAt(0) == '+') {
                final long time = Long.parseLong(t.substring(1));
                tests.add(new FileStatusFilter() {
                    @Override
                    public boolean accept(FileStatus fileStatus) {
                        if ((startTime - fileStatus.getAccessTime()) / (60 * 1000) > time) {
                            return true;
                        } else {
                            return false;
                        }
                    }
                });
            } else if (t.charAt(0) == '-') {
                final long time = Long.parseLong(t.substring(1));
                tests.add(new FileStatusFilter() {
                    @Override
                    public boolean accept(FileStatus fileStatus) {
                        if ((startTime - fileStatus.getAccessTime()) / (60 * 1000) < time) {
                            return true;
                        } else {
                            return false;
                        }
                    }
                });
            } else {
                final long time = Long.parseLong(t);
                tests.add(new FileStatusFilter() {
                    @Override
                    public boolean accept(FileStatus fileStatus) {
                        if ((startTime - fileStatus.getAccessTime()) / (60 * 1000) == time) {
                            return true;
                        } else {
                            return false;
                        }
                    }
                });
            }
        }

        // -mmin test
        else if ("-mmin".equals(args[i])) {
            i++;
            if (i >= args.length) {
                System.err.println("Missing arguement for -mmin");
                System.exit(1);
            }

            String t = args[i];
            if (t.charAt(0) == '+') {
                final long time = Long.parseLong(t.substring(1));
                tests.add(new FileStatusFilter() {
                    @Override
                    public boolean accept(FileStatus fileStatus) {
                        if ((startTime - fileStatus.getModificationTime()) / (60 * 1000) > time) {
                            return true;
                        } else {
                            return false;
                        }
                    }
                });
            } else if (t.charAt(0) == '-') {
                final long time = Long.parseLong(t.substring(1));
                tests.add(new FileStatusFilter() {
                    @Override
                    public boolean accept(FileStatus fileStatus) {
                        if ((startTime - fileStatus.getModificationTime()) / (60 * 1000) < time) {
                            return true;
                        } else {
                            return false;
                        }
                    }
                });
            } else {
                final long time = Long.parseLong(t);
                tests.add(new FileStatusFilter() {
                    @Override
                    public boolean accept(FileStatus fileStatus) {
                        if ((startTime - fileStatus.getModificationTime()) / (60 * 1000) == time) {
                            return true;
                        } else {
                            return false;
                        }
                    }
                });
            }
        }

        // -regex test
        else if ("-regex".equals(args[i])) {
            i++;
            if (i >= args.length) {
                System.err.println("Missing arguement for -regex");
                System.exit(1);
            }

            final Pattern p = Pattern.compile(args[i]);
            tests.add(new FileStatusFilter() {
                @Override
                public boolean accept(FileStatus fileStatus) {
                    if (p.matcher(fileStatus.getPath().toString()).matches()) {
                        return true;
                    } else {
                        return false;
                    }
                }
            });
        }

        i++;
    }

    if (actions.size() == 0) {
        actions.add(new FileStatusFilter() {
            @Override
            public boolean accept(FileStatus fileStatus) {
                System.out.println(fileStatus.getPath());
                return true;
            }
        });
    }

    search();

    return 0;
}

From source file:com.rim.logdriver.admin.LogMaintenance.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    // If run by Oozie, then load the Oozie conf too
    if (System.getProperty("oozie.action.conf.xml") != null) {
        conf.addResource(new URL("file://" + System.getProperty("oozie.action.conf.xml")));
    }//from   ww  w  . j a  v a2  s .  c o m

    // For some reason, Oozie needs some options to be set in system instead of
    // in the confiuration. So copy the configs over.
    {
        Iterator<Entry<String, String>> i = conf.iterator();
        while (i.hasNext()) {
            Entry<String, String> next = i.next();
            System.setProperty(next.getKey(), next.getValue());
        }
    }

    if (args.length < 3) {
        printUsage();
        return 1;
    }

    String userName = args[0];
    String dcNumber = args[1];
    String service = args[2];
    String date = null;
    String hour = null;
    if (args.length >= 4) {
        date = args[3];
    }
    if (args.length >= 5) {
        hour = args[4];
    }

    // Set from environment variables
    oozieUrl = getConfOrEnv(conf, "OOZIE_URL");
    String mergeJobPropertiesFile = getConfOrEnv(conf, "MERGEJOB_CONF");
    String filterJobPropertiesFile = getConfOrEnv(conf, "FILTERJOB_CONF");
    String daysBeforeArchive = getConfOrEnv(conf, "DAYS_BEFORE_ARCHIVE");
    String daysBeforeDelete = getConfOrEnv(conf, "DAYS_BEFORE_DELETE");
    String maxConcurrentMergeJobs = getConfOrEnv(conf, "MAX_CONCURRENT_MERGE_JOBS");
    String maxConcurrentFilterJobs = getConfOrEnv(conf, "MAX_CONCURRENT_FILTER_JOBS");
    String zkConnectString = getConfOrEnv(conf, "ZK_CONNECT_STRING");
    String logdir = getConfOrEnv(conf, "logdriver.logdir.name");
    boolean resetOrphanedJobs = Boolean.parseBoolean(getConfOrEnv(conf, "reset.orphaned.jobs"));
    String rootDir = getConfOrEnv(conf, "service.root.dir");

    boolean doMerge = true;
    boolean doArchive = true;
    boolean doDelete = true;

    if (oozieUrl == null) {
        LOG.info("OOZIE_URL is not set.  Not merging or archiving.");
        doMerge = false;
        doArchive = false;
    }
    if (zkConnectString == null) {
        LOG.error("ZK_CONNECT_STRING is not set.  Exiting.");
        return 1;
    }
    if (mergeJobPropertiesFile == null) {
        LOG.info("MERGEJOB_CONF is not set.  Not merging.");
        doMerge = false;
    }
    if (filterJobPropertiesFile == null) {
        LOG.info("FILTERJOB_CONF is not set.  Not archiving.");
        doArchive = false;
    }
    if (daysBeforeArchive == null) {
        LOG.info("DAYS_BEFORE_ARCHIVE is not set.  Not archiving.");
        doArchive = false;
    }
    if (doArchive && Integer.parseInt(daysBeforeArchive) < 0) {
        LOG.info("DAYS_BEFORE_ARCHIVE is negative.  Not archiving.");
        doArchive = false;
    }
    if (daysBeforeDelete == null) {
        LOG.info("DAYS_BEFORE_DELETE is not set.  Not deleting.");
        doDelete = false;
    }
    if (doDelete && Integer.parseInt(daysBeforeDelete) < 0) {
        LOG.info("DAYS_BEFORE_DELETE is negative.  Not deleting.");
        doDelete = false;
    }
    if (maxConcurrentMergeJobs == null) {
        LOG.info("MAX_CONCURRENT_MERGE_JOBS is not set.  Using default value of -1.");
        maxConcurrentMergeJobs = "-1";
    }
    if (maxConcurrentFilterJobs == null) {
        LOG.info("MAX_CONCURRENT_FILTER_JOBS is not set.  Using default value of -1.");
        maxConcurrentMergeJobs = "-1";
    }
    if (logdir == null) {
        LOG.info("LOGDRIVER_LOGDIR_NAME is not set.  Using default value of 'logs'.");
        logdir = "logs";
    }
    if (rootDir == null) {
        LOG.info("SERVICE_ROOT_DIR is not set.  Using default value of 'service'.");
        rootDir = "/service";
    }

    // Now it's safe to create our Oozie Runners.
    OozieRunner mergeOozieRunner = new OozieRunner(oozieUrl, Integer.parseInt(maxConcurrentMergeJobs));
    Thread mergeOozieRunnerThread = new Thread(mergeOozieRunner);
    mergeOozieRunnerThread.setName("OozieRunner - Merge");
    mergeOozieRunnerThread.setDaemon(false);
    mergeOozieRunnerThread.start();

    OozieRunner filterOozieRunner = new OozieRunner(oozieUrl, Integer.parseInt(maxConcurrentFilterJobs));
    Thread filterOozieRunnerThread = new Thread(filterOozieRunner);
    filterOozieRunnerThread.setName("OozieRunner - Filter");
    filterOozieRunnerThread.setDaemon(false);
    filterOozieRunnerThread.start();

    // Figure out what date we start filters on.
    String filterCutoffDate = "";
    if (doArchive) {
        Calendar cal = Calendar.getInstance();
        cal.add(Calendar.DAY_OF_MONTH, Integer.parseInt("-" + daysBeforeArchive));
        filterCutoffDate = String.format("%04d%02d%02d%02d", cal.get(Calendar.YEAR),
                (cal.get(Calendar.MONTH) + 1), cal.get(Calendar.DAY_OF_MONTH), cal.get(Calendar.HOUR_OF_DAY));
        LOG.info("Archiving logs from before {}", filterCutoffDate);
    }
    String deleteCutoffDate = "";
    if (doDelete) {
        Calendar cal = Calendar.getInstance();
        cal.add(Calendar.DAY_OF_MONTH, Integer.parseInt("-" + daysBeforeDelete));
        deleteCutoffDate = String.format("%04d%02d%02d%02d", cal.get(Calendar.YEAR),
                (cal.get(Calendar.MONTH) + 1), cal.get(Calendar.DAY_OF_MONTH), cal.get(Calendar.HOUR_OF_DAY));
        LOG.info("Deleting logs from before {}", deleteCutoffDate);
    }

    long now = System.currentTimeMillis();

    // Various exceptions have been popping up here. So make sure I catch them
    // all.
    try {
        // We can hang if this fails. So make sure we abort if it fails.
        FileSystem fs = null;
        try {
            fs = FileSystem.get(conf);
            fs.exists(new Path("/")); // Test if it works.
        } catch (IOException e) {
            LOG.error("Error getting filesystem.", e);
            return 1;
        }
        // We'll need an Oozie client to check on orphaned directories.
        oozieClient = getOozieClient();

        // LockUtils are used in a couple of places
        LockUtil lu = new LockUtil(zkConnectString);

        // Patterns to recognize hour, day and incoming directories, so that they
        // can be processed.
        Pattern datePathPattern;
        Pattern hourPathPattern;
        Pattern incomingPathPattern;
        Pattern dataPathPattern;
        Pattern archivePathPattern;
        Pattern workingPathPattern;
        if (hour != null) {
            datePathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/"
                    + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(" + Pattern.quote(date) + ")");
            hourPathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/"
                    + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(" + Pattern.quote(date) + ")/("
                    + Pattern.quote(hour) + ")");
            incomingPathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/"
                    + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(" + Pattern.quote(date) + ")/("
                    + Pattern.quote(hour) + ")/([^/]+)/incoming");
            dataPathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/"
                    + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(" + Pattern.quote(date) + ")/("
                    + Pattern.quote(hour) + ")/([^/]+)/data");
            archivePathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/"
                    + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(" + Pattern.quote(date) + ")/("
                    + Pattern.quote(hour) + ")/([^/]+)/archive");
            workingPathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/"
                    + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(" + Pattern.quote(date) + ")/("
                    + Pattern.quote(hour) + ")/([^/]+)/working/([^/]+)_(\\d+)");
        } else if (date != null) {
            datePathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/"
                    + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(" + Pattern.quote(date) + ")");
            hourPathPattern = Pattern
                    .compile(rootDir + "/" + Pattern.quote(dcNumber) + "/" + Pattern.quote(service) + "/"
                            + Pattern.quote(logdir) + "/(" + Pattern.quote(date) + ")/(\\d{2})");
            incomingPathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/"
                    + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(" + Pattern.quote(date)
                    + ")/(\\d{2})/([^/]+)/incoming");
            dataPathPattern = Pattern
                    .compile(rootDir + "/" + Pattern.quote(dcNumber) + "/" + Pattern.quote(service) + "/"
                            + Pattern.quote(logdir) + "/(" + Pattern.quote(date) + ")/(\\d{2})/([^/]+)/data");
            archivePathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/"
                    + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(" + Pattern.quote(date)
                    + ")/(\\d{2})/([^/]+)/archive");
            workingPathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/"
                    + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(" + Pattern.quote(date)
                    + ")/(\\d{2})/([^/]+)/working/([^/]+)_(\\d+)");
        } else {
            datePathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/"
                    + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(\\d{8})");
            hourPathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/"
                    + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(\\d{8})/(\\d{2})");
            incomingPathPattern = Pattern
                    .compile(rootDir + "/" + Pattern.quote(dcNumber) + "/" + Pattern.quote(service) + "/"
                            + Pattern.quote(logdir) + "/(\\d{8})/(\\d{2})/([^/]+)/incoming");
            dataPathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/"
                    + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(\\d{8})/(\\d{2})/([^/]+)/data");
            archivePathPattern = Pattern
                    .compile(rootDir + "/" + Pattern.quote(dcNumber) + "/" + Pattern.quote(service) + "/"
                            + Pattern.quote(logdir) + "/(\\d{8})/(\\d{2})/([^/]+)/archive");
            workingPathPattern = Pattern
                    .compile(rootDir + "/" + Pattern.quote(dcNumber) + "/" + Pattern.quote(service) + "/"
                            + Pattern.quote(logdir) + "/(\\d{8})/(\\d{2})/([^/]+)/working/([^/]+)_(\\d+)");
        }

        // Do a depth first search of the directory, processing anything that
        // looks
        // interesting along the way
        Deque<Path> paths = new ArrayDeque<Path>();
        Path rootPath = new Path(rootDir + "/" + dcNumber + "/" + service + "/" + logdir + "/");
        paths.push(rootPath);

        while (paths.size() > 0) {
            Path p = paths.pop();
            LOG.debug("{}", p.toString());

            if (!fs.exists(p)) {
                continue;
            }

            FileStatus dirStatus = fs.getFileStatus(p);
            FileStatus[] children = fs.listStatus(p);
            boolean addChildren = true;

            boolean old = dirStatus.getModificationTime() < now - WAIT_TIME;
            LOG.debug("    Was last modified {}ms ago", now - dirStatus.getModificationTime());

            if (!old) {
                LOG.debug("    Skipping, since it's not old enough.");

            } else if ((!rootPath.equals(p)) && (children.length == 0
                    || (children.length == 1 && children[0].getPath().getName().equals(READY_MARKER)))) {
                // old and no children? Delete!
                LOG.info("    Deleting empty directory {}", p.toString());
                fs.delete(p, true);

            } else {
                Matcher matcher = datePathPattern.matcher(p.toUri().getPath());
                if (matcher.matches()) {
                    LOG.debug("Checking date directory");

                    // If this is already done, then skip it. So only process if it
                    // doesn't exist.
                    if (fs.exists(new Path(p, READY_MARKER)) == false) {
                        // Check each subdirectory. If they all have ready markers, then I
                        // guess we're ready.
                        boolean ready = true;
                        for (FileStatus c : children) {
                            if (c.isDir() && fs.exists(new Path(c.getPath(), READY_MARKER)) == false) {
                                ready = false;
                                break;
                            }
                        }

                        if (ready) {
                            fs.createNewFile(new Path(p, READY_MARKER));
                        }
                    }
                }

                matcher = hourPathPattern.matcher(p.toUri().getPath());
                if (matcher.matches()) {
                    LOG.debug("Checking hour directory");

                    // If this is already done, then skip it. So only process if it
                    // doesn't exist.
                    if (fs.exists(new Path(p, READY_MARKER)) == false) {
                        // Check each subdirectory. If they all have ready markers, then I
                        // guess we're ready.
                        boolean ready = true;
                        for (FileStatus c : children) {
                            if (c.isDir() && fs.exists(new Path(c.getPath(), READY_MARKER)) == false) {
                                ready = false;
                                break;
                            }
                        }

                        if (ready) {
                            fs.createNewFile(new Path(p, READY_MARKER));
                        }
                    }
                }

                // Check to see if we have to run a merge
                matcher = incomingPathPattern.matcher(p.toUri().getPath());
                if (matcher.matches()) {
                    LOG.debug("Checking incoming directory");
                    String matchDate = matcher.group(1);
                    String matchHour = matcher.group(2);
                    String matchComponent = matcher.group(3);

                    String timestamp = matchDate + matchHour;

                    if (doDelete && timestamp.compareTo(deleteCutoffDate) < 0) {
                        LOG.info("Deleting old directory: {}", p);
                        fs.delete(p, true);
                        addChildren = false;
                    } else if (doMerge) {

                        // old, looks right, and has children? Run it!
                        boolean hasMatchingChildren = false;
                        boolean subdirTooYoung = false;

                        for (FileStatus child : children) {
                            if (!hasMatchingChildren) {
                                FileStatus[] grandchildren = fs.listStatus(child.getPath());
                                for (FileStatus gc : grandchildren) {
                                    if (VALID_FILE.matcher(gc.getPath().getName()).matches()) {
                                        hasMatchingChildren = true;
                                        break;
                                    }
                                }
                            }
                            if (!subdirTooYoung) {
                                if (child.getModificationTime() >= now - WAIT_TIME) {
                                    subdirTooYoung = true;
                                    LOG.debug("    Subdir {} is too young.", child.getPath());
                                }
                            }
                        }

                        if (!hasMatchingChildren) {
                            LOG.debug("    No files match the expected pattern ({})", VALID_FILE.pattern());
                        }

                        if (hasMatchingChildren && !subdirTooYoung) {
                            LOG.info("    Run Merge job {} :: {} {} {} {} {}", new Object[] { p.toString(),
                                    dcNumber, service, matchDate, matchHour, matchComponent });

                            Properties oozieJobProps = new Properties();
                            oozieJobProps.load(new FileInputStream(mergeJobPropertiesFile));

                            oozieJobProps.setProperty("rootDir", rootDir);
                            oozieJobProps.setProperty("dcNumber", dcNumber);
                            oozieJobProps.setProperty("service", service);
                            oozieJobProps.setProperty("date", matchDate);
                            oozieJobProps.setProperty("hour", matchHour);
                            oozieJobProps.setProperty("component", matchComponent);
                            oozieJobProps.setProperty("user.name", userName);
                            oozieJobProps.setProperty("logdir", logdir);

                            mergeOozieRunner.submit(oozieJobProps);

                            addChildren = false;
                        }
                    }
                }

                // Check to see if we need to run a filter and archive
                matcher = dataPathPattern.matcher(p.toUri().getPath());
                if (matcher.matches()) {
                    String matchDate = matcher.group(1);
                    String matchHour = matcher.group(2);
                    String matchComponent = matcher.group(3);

                    String timestamp = matchDate + matchHour;

                    if (doDelete && timestamp.compareTo(deleteCutoffDate) < 0) {
                        LOG.info("Deleting old directory: {}", p);
                        fs.delete(p, true);
                        addChildren = false;
                    } else if (doArchive && timestamp.compareTo(filterCutoffDate) < 0) {

                        Properties oozieJobProps = new Properties();
                        oozieJobProps.load(new FileInputStream(filterJobPropertiesFile));

                        oozieJobProps.setProperty("rootDir", rootDir);
                        oozieJobProps.setProperty("dcNumber", dcNumber);
                        oozieJobProps.setProperty("service", service);
                        oozieJobProps.setProperty("date", matchDate);
                        oozieJobProps.setProperty("hour", matchHour);
                        oozieJobProps.setProperty("component", matchComponent);
                        oozieJobProps.setProperty("user.name", userName);
                        oozieJobProps.setProperty("logdir", logdir);

                        // Check to see if we should just keep all or delete all here.
                        // The filter file should be here
                        String appPath = oozieJobProps.getProperty("oozie.wf.application.path");
                        appPath = appPath.replaceFirst("\\$\\{.*?\\}", "");
                        Path filterFile = new Path(appPath + "/" + service + ".yaml");
                        LOG.info("Filter file is {}", filterFile);
                        if (fs.exists(filterFile)) {
                            List<BoomFilterMapper.Filter> filters = BoomFilterMapper.loadFilters(matchComponent,
                                    fs.open(filterFile));

                            if (filters == null) {
                                LOG.warn(
                                        "    Got null when getting filters.  Not processing. {} :: {} {} {} {} {}",
                                        new Object[] { p.toString(), dcNumber, service, matchDate, matchHour,
                                                matchComponent });
                            } else if (filters.size() == 0) {
                                LOG.warn("    Got no filters.  Not processing. {} :: {} {} {} {} {}",
                                        new Object[] { p.toString(), dcNumber, service, matchDate, matchHour,
                                                matchComponent });
                            } else if (filters.size() == 1
                                    && filters.get(0) instanceof BoomFilterMapper.KeepAllFilter) {
                                LOG.info("    Keeping everything. {} :: {} {} {} {} {}",
                                        new Object[] { p.toString(), dcNumber, service, matchDate, matchHour,
                                                matchComponent });
                                // Move files from data to archive
                                // delete it all!
                                String destination = rootDir + "/" + dcNumber + "/" + service + "/" + logdir
                                        + "/" + matchDate + "/" + matchHour + "/" + matchComponent
                                        + "/archive/";

                                String[] moveArgs = { zkConnectString, dcNumber, service, matchDate, matchHour,
                                        matchComponent, "move " + p.toUri().getPath() + " " + destination };
                                ToolRunner.run(new Configuration(), new LockedFs(), moveArgs);
                            } else if (filters.size() == 1
                                    && filters.get(0) instanceof BoomFilterMapper.DropAllFilter) {
                                LOG.info("    Dropping everything. {} :: {} {} {} {} {}",
                                        new Object[] { p.toString(), dcNumber, service, matchDate, matchHour,
                                                matchComponent });
                                // delete it all!
                                String[] delArgs = { zkConnectString, dcNumber, service, matchDate, matchHour,
                                        matchComponent, "delete " + p.toUri().getPath() };
                                ToolRunner.run(new Configuration(), new LockedFs(), delArgs);
                            } else {
                                LOG.info("    Run Filter/Archive job {} :: {} {} {} {} {}",
                                        new Object[] { p.toString(), dcNumber, service, matchDate, matchHour,
                                                matchComponent });
                                filterOozieRunner.submit(oozieJobProps);
                            }
                        } else {
                            LOG.warn("Skipping filter job, since no filter file exists");
                        }

                        addChildren = false;
                    }
                }

                matcher = archivePathPattern.matcher(p.toUri().getPath());
                if (matcher.matches()) {
                    String matchDate = matcher.group(1);
                    String matchHour = matcher.group(2);

                    String timestamp = matchDate + matchHour;

                    if (doDelete && timestamp.compareTo(deleteCutoffDate) < 0) {
                        LOG.info("Deleting old directory: {}", p);
                        fs.delete(p, true);
                        addChildren = false;
                    }
                }

                matcher = workingPathPattern.matcher(p.toUri().getPath());
                if (matcher.matches()) {
                    LOG.info("  Matches working pattern");
                    if (resetOrphanedJobs) {
                        String matchDate = matcher.group(1);
                        String matchHour = matcher.group(2);
                        String matchComponent = matcher.group(3);
                        String matchOozieJobId = matcher.group(4);

                        // Check to see what's up with the oozie job. If it's still
                        // running,
                        // we don't want to touch it.
                        Status status = null;
                        try {
                            WorkflowJob jobInfo = oozieClient.getJobInfo(matchOozieJobId);
                            status = jobInfo.getStatus();
                        } catch (OozieClientException e) {
                            if (e.getMessage() != null && e.getMessage().contains("Job does not exist")) {
                                LOG.info("Oozie job not found.  Proceeding as though job was failed.", e);
                                status = Status.FAILED;
                            } else {
                                LOG.error("Oozie client error.  Not Proceeding.", e);
                            }
                        }
                        LOG.info("  Oozie job status is {}", status);
                        if (status != null && status != Status.RUNNING && status != Status.PREP
                                && status != Status.SUSPENDED) {
                            // Move everything from working/xxx/incoming/ to incoming/
                            PathInfo lockPathInfo = new PathInfo(rootDir + "/" + dcNumber + "/" + service + "/"
                                    + logdir + "/" + matchDate + "/" + matchHour + "/" + matchComponent);
                            lu.acquireWriteLock(lu.getLockPath(lockPathInfo));

                            FileStatus[] fileStatuses = fs
                                    .listStatus(new Path(p.toUri().getPath() + "/incoming/"));
                            if (fileStatuses != null) {
                                for (FileStatus fileStatus : fileStatuses) {
                                    Path toPath = new Path(fileStatus.getPath().getParent().getParent()
                                            .getParent().getParent(),
                                            "incoming/" + fileStatus.getPath().getName());

                                    LOG.info("  Moving data from {} to {}", fileStatus.getPath(), toPath);
                                    LOG.info("    mkdir {}", toPath);
                                    fs.mkdirs(toPath);

                                    Path fromDir = new Path(p.toUri().getPath(),
                                            "incoming/" + fileStatus.getPath().getName());
                                    LOG.info("    moving from {}", fromDir);
                                    FileStatus[] files = fs.listStatus(fromDir);
                                    if (files == null || files.length == 0) {
                                        LOG.info("    Nothing to move from  {}", fromDir);
                                    } else {
                                        for (FileStatus f : files) {
                                            LOG.info("    rename {} {}", f.getPath(),
                                                    new Path(toPath, f.getPath().getName()));
                                            fs.rename(f.getPath(), new Path(toPath, f.getPath().getName()));
                                        }
                                    }

                                    LOG.info("    rm {}", fileStatus.getPath().getParent().getParent());
                                    fs.delete(fileStatus.getPath().getParent().getParent(), true);
                                }

                                lu.releaseWriteLock(lu.getLockPath(lockPathInfo));

                            }
                        }
                    }

                    addChildren = false;
                }
            }

            // Add any children which are directories to the stack.
            if (addChildren) {
                for (int i = children.length - 1; i >= 0; i--) {
                    FileStatus child = children[i];
                    if (child.isDir()) {
                        paths.push(child.getPath());
                    }
                }
            }
        }

        // Since we may have deleted a bunch of directories, delete any unused
        // locks
        // from ZooKeeper.
        {
            LOG.info("Checking for unused locks in ZooKeeper");
            String scanPath = rootDir + "/" + dcNumber + "/" + service + "/" + logdir;
            if (date != null) {
                scanPath += "/" + date;
                if (hour != null) {
                    scanPath += "/" + hour;
                }
            }

            List<LockInfo> lockInfo = lu.scan(scanPath);

            for (LockInfo li : lockInfo) {
                // Check if the lock path still exists in HDFS. If it doesn't, then
                // delete it from ZooKeeper.
                String path = li.getPath();
                String hdfsPath = path.substring(LockUtil.ROOT.length());
                if (!fs.exists(new Path(hdfsPath))) {
                    ZooKeeper zk = lu.getZkClient();

                    while (!path.equals(LockUtil.ROOT)) {
                        try {
                            zk.delete(path, -1);
                        } catch (KeeperException.NotEmptyException e) {
                            // That's fine. just stop trying then.
                            break;
                        } catch (Exception e) {
                            LOG.error("Caught exception trying to delete from ZooKeeper.", e);
                            break;
                        }
                        LOG.info("Deleted from ZooKeeper: {}", path);
                        path = path.substring(0, path.lastIndexOf('/'));
                    }

                }
            }
        }
        lu.close();

        // Now that we're done, wait for the Oozie Runner to stop, and print the
        // results.
        LOG.info("Waiting for Oozie jobs to complete.");
        mergeOozieRunner.shutdown();
        mergeOozieRunnerThread.join();
        LOG.info("Oozie Job Stats : Merge  : Started={} Succeeded={} failed={} errors={}",
                new Object[] { mergeOozieRunner.getStarted(), mergeOozieRunner.getSucceeded(),
                        mergeOozieRunner.getFailed(), mergeOozieRunner.getErrors() });

        filterOozieRunner.shutdown();
        filterOozieRunnerThread.join();
        LOG.info("Oozie Job Stats : Filter : Started={} Succeeded={} failed={} errors={}",
                new Object[] { filterOozieRunner.getStarted(), filterOozieRunner.getSucceeded(),
                        filterOozieRunner.getFailed(), filterOozieRunner.getErrors() });

    } catch (Exception e) {
        LOG.error("Unexpected exception caught.", e);
        return 1;
    }

    return 0;
}

From source file:com.rockstor.compact.Compactor.java

License:Apache License

public void compactData(String taskIdName) throws IOException, NoSuchAlgorithmException {
    Path dstDir = new Path(pathUtil.getSpecTaskDir(taskIdName));
    FileSystem dfs = RockAccessor.getFileSystem();
    if (!dfs.exists(dstDir)) {
        LOG.error("[COMPACTOR]: Directory " + dstDir + " is not exist");
        return;/*from  w ww. j  av a2s .  com*/
    }

    String metaFileName = pathUtil.getTaskMetaPath(taskIdName);
    if (!dfs.exists(new Path(metaFileName))) {
        LOG.error("[COMPACTOR]: meta file " + metaFileName + " is not existed");
        return;
    }

    // compact data
    // 1. create rock data file
    String rockIdStr = null;

    // 2. create rock index file

    // 3. load meta file
    TaskMetaReader rocksMeta = new TaskMetaReader();
    rocksMeta.open(metaFileName);
    Map<String, byte[]> rocks = rocksMeta.getRocks();
    rocksMeta.close();
    // 4. compact rock files one by one
    /*
     * for(rock:rocks){ load rock gb from db; load rock gb from delete file
     * sort gb by offset copy chunks to new data file, and write new index,
     * if offset is in gb set, drop and continue }
     */
    Map<Long, Long> gbIndexes = null;
    RockIndexReader rockIndexReader = null;
    RockReader rockReader = null;
    Chunk chunk = null;

    // create rock writer
    RockCompactWriter rockWriter = new RockCompactWriter();
    rockWriter.create(taskIdName);

    rockIdStr = rockWriter.getRockID();

    String dataFileName = pathUtil.getTaskDataPath(taskIdName, rockIdStr);

    String gbIndexPath = null;

    long pos = 0;
    Long size = null;

    for (Entry<String, byte[]> entry : rocks.entrySet()) {
        LOG.info("compacting rock :" + entry.getKey());
    }

    for (Entry<String, byte[]> entry : rocks.entrySet()) {
        gbIndexes = RockDB.getGarbages(entry.getValue());
        rockIndexReader = new RockIndexReader();
        LOG.debug("get " + gbIndexes.size() + " invalid chunks of rock " + entry.getKey() + " from chunk DB");
        gbIndexPath = pathUtil.getGbMetaPath(entry.getKey());
        if (dfs.exists(new Path(gbIndexPath))) {
            rockIndexReader.open(gbIndexPath);

            // merge gb data index
            while (rockIndexReader.hasNext()) {
                chunk = rockIndexReader.next();
                LOG.debug("ignore list append chunk: " + chunk);
                gbIndexes.put(chunk.getOffset(), chunk.getSize() + Chunk.HEADER_LEN);
            }

            rockIndexReader.close();
        }

        // copy chunks and write new index
        rockReader = RockReaderPool.getInstance().get(entry.getKey());
        FSDataInputStream input = rockReader.getFSDataInputStream();
        int pedding_bytes = 0;
        while (rockReader.hasNext()) {
            pos = rockReader.getPos();
            pedding_bytes = (int) (pos & 7);
            if (pedding_bytes != 0) {
                pos = pos + 8 - pedding_bytes;
            }

            // LOG.info("pos now: "+pos);

            size = gbIndexes.get(pos);

            // ignore deleted chunk
            if (size != null) {
                LOG.debug("ignore chunk at " + pos + ", size: " + size);

                rockReader.seekg(pos + size);
                continue;
            }

            chunk = rockReader.nextChunk();
            if (chunk == null) {
                LOG.error("[Compactor] read source chunk from " + entry.getKey() + ":" + pos + " Failed");
                throw new IOException(
                        "[Compactor] read source chunk from " + entry.getKey() + ":" + pos + " Failed");
            }

            rockWriter.addChunk(chunk, input);
        }
    }

    rockWriter.close();

    // 5. rename ${compactorDir}/rockId.dat ==> $(rock_data_dir)/rockId
    dfs.rename(new Path(dataFileName), new Path(Rock.HADOOP_DATA_HOME + "/" + rockIdStr));

    // 6. remove invalid chunks
    removeInvalidChunks(taskIdName);

    // 7. sync left chunks
    syncLeftChunks(taskIdName);

    // 8. remove task dir
    dfs.delete(dstDir, true);
}

From source file:com.rockstor.tools.RockStorFsFormat.java

License:Apache License

protected void cleanDfs() throws IOException {
    RockAccessor.connectHDFS();//  w  ww  . ja  va2 s  . c  om
    String rootDir = conf.get("rockstor.rootdir");
    LOG.info("connect to hdfs ok!");
    FileSystem dfs = RockAccessor.getFileSystem();
    dfs.delete(new Path(rootDir), true);
    LOG.info("remove rockstor root dir " + rootDir + " OK!");
    RockAccessor.disconnectHDFS();
    LOG.info("disconnect from hdfs ok!");
}

From source file:com.savy3.nonequijoin.MapOutputSampler.java

License:Apache License

/**
 * Driver for InputSampler MapReduce Job
 *//*from  w w w .  ja v a  2  s  . c o m*/
public static void runMap(Job job, Path sampleInputPath)
        throws IOException, IllegalStateException, ClassNotFoundException, InterruptedException {
    LOG.info("Running a MapReduce Job on Sample Input File" + sampleInputPath.toString());

    Configuration conf = new Configuration();
    conf.setBoolean("mapreduce.job.ubertask.enable", true);
    conf.set("numSamples", "" + (job.getNumReduceTasks() - 1));
    Job sampleJob = new Job(conf);
    sampleJob.setMapperClass(job.getMapperClass());
    sampleJob.setReducerClass(SampleKeyReducer.class);
    sampleJob.setJarByClass(job.getMapperClass());
    sampleJob.setMapOutputKeyClass(job.getMapOutputKeyClass());
    sampleJob.setMapOutputValueClass(job.getMapOutputValueClass());
    sampleJob.setOutputKeyClass(job.getMapOutputKeyClass());
    sampleJob.setOutputValueClass(NullWritable.class);
    sampleJob.setInputFormatClass(SequenceFileInputFormat.class);
    sampleJob.setOutputFormatClass(SequenceFileOutputFormat.class);

    SequenceFileInputFormat.addInputPath(sampleJob, sampleInputPath);
    FileSystem fs = FileSystem.get(conf);

    Path out = new Path(sampleInputPath.getParent(), "mapOut");
    fs.delete(out, true);

    SequenceFileOutputFormat.setOutputPath(sampleJob, out);

    sampleJob.waitForCompletion(true);

    LOG.info("Sample MapReduce Job Output File" + out.toString());

    Path partFile = new Path(out, "part-r-00000");
    Path tmpFile = new Path("/_tmp");
    fs.delete(tmpFile, true);
    fs.rename(partFile, tmpFile);
    fs.delete(sampleInputPath.getParent(), true);
    fs.rename(new Path("/_tmp"), sampleInputPath.getParent());

    LOG.info("Sample partitioning file cpied to location " + sampleInputPath.getParent().toString());
}

From source file:com.scaleunlimited.cascading.hadoop.HadoopUtils.java

License:Apache License

public static void safeRemove(FileSystem fs, Path path) {
    if ((fs != null) && (path != null)) {
        try {// w w w .ja va2s  .c  o m
            fs.delete(path, true);
        } catch (Throwable t) {
            // Ignore
        }
    }
}