Example usage for org.apache.hadoop.fs FileSystem delete

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem delete.

Prototype

public abstract boolean delete(Path f, boolean recursive) throws IOException;

Source Link

Document

Delete a file.

Usage

From source file:byte_import.HexastoreBulkImport.java

License:Open Source License

public Job createSubmittableJob(String[] args) {
    TABLE_NAME = args[1];// w w  w .jav a2 s  .  c o  m
    Job job = null;
    try {
        job = new Job(new Configuration(), NAME);
        job.setJarByClass(HexastoreBulkImport.class);
        job.setMapperClass(sampler.TotalOrderPrep.Map.class);
        job.setReducerClass(Reduce.class);
        job.setCombinerClass(Combiner.class);
        job.setMapOutputKeyClass(ImmutableBytesWritable.class);
        job.setMapOutputValueClass(ImmutableBytesWritable.class);
        job.setPartitionerClass(TotalOrderPartitioner.class);
        //TotalOrderPartitioner.setPartitionFile(job.getConfiguration(), new Path("/user/npapa/"+regions+"partitions/part-r-00000"));
        TotalOrderPartitioner.setPartitionFile(job.getConfiguration(), new Path("partitions/part-r-00000"));
        job.setInputFormatClass(TextInputFormat.class);
        job.setOutputFormatClass(HFileOutputFormat.class);
        Path out = new Path("out");
        FileOutputFormat.setOutputPath(job, out);
        Configuration conf = new Configuration();
        FileSystem fs;
        try {
            fs = FileSystem.get(conf);
            if (fs.exists(out)) {
                fs.delete(out, true);
            }
        } catch (IOException e) {
            e.printStackTrace();
        }

        HBaseAdmin hadmin = new HBaseAdmin(conf);
        HTableDescriptor desc = new HTableDescriptor(TABLE_NAME + "_stats");
        HColumnDescriptor family = new HColumnDescriptor("size");
        desc.addFamily(family);
        conf.setInt("zookeeper.session.timeout", 600000);
        if (hadmin.tableExists(TABLE_NAME + "_stats")) {
            //hadmin.disableTable(TABLE_NAME+"_stats");
            //hadmin.deleteTable(TABLE_NAME+"_stats");
        } else {
            hadmin.createTable(desc);
        }

        FileInputFormat.setInputPaths(job, new Path(args[0]));
        //job.getConfiguration().setInt("mapred.map.tasks", 18);
        job.getConfiguration().set("h2rdf.tableName", TABLE_NAME);
        job.getConfiguration().setInt("mapred.reduce.tasks", (int) TotalOrderPrep.regions);
        job.getConfiguration().setBoolean("mapred.map.tasks.speculative.execution", false);
        job.getConfiguration().setBoolean("mapred.reduce.tasks.speculative.execution", false);
        job.getConfiguration().setInt("io.sort.mb", 100);
        job.getConfiguration().setInt("io.file.buffer.size", 131072);
        job.getConfiguration().setInt("mapred.job.reuse.jvm.num.tasks", -1);
        //job.getConfiguration().setInt("hbase.hregion.max.filesize", 67108864);
        job.getConfiguration().setInt("hbase.hregion.max.filesize", 33554432);
        job.getConfiguration().setInt("mapred.tasktracker.map.tasks.maximum", 5);
        job.getConfiguration().setInt("mapred.tasktracker.reduce.tasks.maximum", 5);
        //job.getConfiguration().setInt("io.sort.mb", 100);

    } catch (IOException e2) {
        e2.printStackTrace();
    }

    return job;
}

From source file:ca.uwaterloo.iss4e.hadoop.pointperrow.CosineMain.java

License:Open Source License

public int run(String[] args) throws IOException {
    Configuration conf = getConf();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: ca.uwaterloo.iss4e.hadoop.pointperrow.ConsineMain <input> <output>");
        System.exit(2);// w w w  . j a  v  a2s.  c  o m
    }
    Job job1 = new Job(conf, "ConsineMain");
    job1.setJarByClass(CosineMain.class);

    job1.setMapperClass(AggregateReadingsMapper.class);
    job1.setMapOutputKeyClass(LongWritable.class);
    job1.setMapOutputValueClass(DoubleWritable.class);

    job1.setReducerClass(AggregateReadingsReducer.class);
    job1.setOutputKeyClass(LongWritable.class);
    job1.setOutputValueClass(Text.class);
    FileInputFormat.setInputDirRecursive(job1, true);
    FileInputFormat.setInputPaths(job1, new Path(otherArgs[0]));
    int lastIdx = otherArgs[0].lastIndexOf("/");
    String tempOutput = otherArgs[0].substring(0, lastIdx) + "/temp";
    FileOutputFormat.setOutputPath(job1, new Path(tempOutput));

    System.out.println("\nStarting Job-1 ...");
    final long startTime = System.currentTimeMillis();
    try {
        final long startTimeJob1 = System.currentTimeMillis();
        if (!job1.waitForCompletion(true)) {
            System.out.println("Job-1 failed.");
        } else {
            System.out.println("Duration of Job1 " + ((System.currentTimeMillis() - startTimeJob1) / 1000.0)
                    + " seconds.");
            final Job job2 = new Job(conf, "ConsineMain Aggregate");
            job2.setJarByClass(CosineMain.class);
            job2.setInputFormatClass(CartesianInputFormat.class);
            CartesianInputFormat.setLeftInputInfo(job2, TextInputFormat.class, tempOutput);
            CartesianInputFormat.setRightInputInfo(job2, TextInputFormat.class, tempOutput);
            FileOutputFormat.setOutputPath(job2, new Path(otherArgs[1]));

            job2.setMapperClass(CartesianProductMapper.class);
            job2.setMapOutputKeyClass(DoubleWritable.class);
            job2.setMapOutputValueClass(Text.class);

            job2.setSortComparatorClass(DescendingKeyComparator.class);

            job2.setReducerClass(CartesianProductReducer.class);
            job2.setOutputKeyClass(Text.class);
            job2.setOutputValueClass(DoubleWritable.class);

            job2.setNumReduceTasks(10);
            final long startTimeJob2 = System.currentTimeMillis();
            System.out.println("\nStarting Job-2 ...");
            if (!job2.waitForCompletion(true)) {
                System.out.println("Job-2 failed.");
            } else {
                System.out.println("Duration of Job2: "
                        + ((System.currentTimeMillis() - startTimeJob2) / 1000.0) + " seconds.");
            }

        }
        FileSystem fs = FileSystem.get(conf);
        fs.delete(new Path(tempOutput), true);
    } catch (Exception e) {
        throw new RuntimeException(e);
    } finally {
        final double duration = (System.currentTimeMillis() - startTime) / 1000.0;
        System.out.println("Total Duration: " + duration + " seconds.");
    }
    return 0;
}

From source file:cascading.flow.hadoop.MapReduceFlowPlatformTest.java

License:Open Source License

private String remove(String path, boolean delete) throws IOException {
    FileSystem fs = FileSystem.get(URI.create(path), HadoopPlanner.createJobConf(getProperties()));

    if (delete)//from   w w  w  .j  a v a  2 s  . c  o m
        fs.delete(new Path(path), true);

    return path;
}

From source file:cascading.flow.MapReduceFlowTest.java

License:Open Source License

private String remove(String path, boolean delete) throws IOException {
    FileSystem fs = FileSystem.get(URI.create(path), MultiMapReducePlanner.getJobConf(getProperties()));

    if (delete)//from  w ww .  j  av a 2  s .co  m
        fs.delete(new Path(path), true);

    return path;
}

From source file:cascading.tap.hadoop.Hadoop18TapUtil.java

License:Open Source License

/**
 * copies all files from the taskoutputpath to the outputpath
 *
 * @param conf/*ww  w  .j  a v  a  2 s.  c om*/
 */
public static void commitTask(JobConf conf) throws IOException {
    Path taskOutputPath = new Path(conf.get("mapred.work.output.dir"));

    FileSystem fs = getFSSafe(conf, taskOutputPath);

    if (fs == null)
        return;

    AtomicInteger integer = pathCounts.get(taskOutputPath.toString());

    if (integer.decrementAndGet() != 0)
        return;

    String taskId = conf.get("mapred.task.id");

    LOG.info("committing task: '" + taskId + "' - " + taskOutputPath);

    if (taskOutputPath != null) {
        if (writeDirectlyToWorkingPath(conf, taskOutputPath))
            return;

        if (fs.exists(taskOutputPath)) {
            Path jobOutputPath = taskOutputPath.getParent().getParent();
            // Move the task outputs to their final place
            moveTaskOutputs(conf, fs, jobOutputPath, taskOutputPath);

            // Delete the temporary task-specific output directory
            if (!fs.delete(taskOutputPath, true))
                LOG.info("failed to delete the temporary output directory of task: '" + taskId + "' - "
                        + taskOutputPath);

            LOG.info("saved output of task '" + taskId + "' to " + jobOutputPath);
        }
    }
}

From source file:cascading.tap.hadoop.Hadoop18TapUtil.java

License:Open Source License

private static synchronized void cleanTempPath(JobConf conf, Path outputPath) throws IOException {
    // do the clean up of temporary directory

    if (outputPath != null) {
        FileSystem fileSys = getFSSafe(conf, outputPath);

        if (fileSys == null)
            return;

        if (!fileSys.exists(outputPath))
            return;

        Path tmpDir = new Path(outputPath, TEMPORARY_PATH);

        LOG.info("deleting temp path " + tmpDir);

        if (fileSys.exists(tmpDir))
            fileSys.delete(tmpDir, true);
    }//from ww  w. j a v a  2s .  c o  m
}

From source file:cascading.tap.hadoop.Hadoop18TapUtil.java

License:Open Source License

private static void moveTaskOutputs(JobConf conf, FileSystem fs, Path jobOutputDir, Path taskOutput)
        throws IOException {
    String taskId = conf.get("mapred.task.id");

    if (fs.isFile(taskOutput)) {
        Path finalOutputPath = getFinalPath(jobOutputDir, taskOutput, getTaskOutputPath(conf));
        if (!fs.rename(taskOutput, finalOutputPath)) {
            if (!fs.delete(finalOutputPath, true)) {
                throw new IOException("Failed to delete earlier output of task: " + taskId);
            }/*  ww w .  j a  va 2  s. c om*/
            if (!fs.rename(taskOutput, finalOutputPath)) {
                throw new IOException("Failed to save output of task: " + taskId);
            }
        }
        LOG.debug("Moved " + taskOutput + " to " + finalOutputPath);
    } else if (fs.getFileStatus(taskOutput).isDir()) {
        FileStatus[] paths = fs.listStatus(taskOutput);
        Path finalOutputPath = getFinalPath(jobOutputDir, taskOutput, getTaskOutputPath(conf));
        fs.mkdirs(finalOutputPath);
        if (paths != null) {
            for (FileStatus path : paths) {
                moveTaskOutputs(conf, fs, jobOutputDir, path.getPath());
            }
        }
    }
}

From source file:cascading.tap.hadoop.Hfs.java

License:Open Source License

private boolean deleteFullIdentifier(Configuration conf, String fullIdentifier) throws IOException {
    if (LOG.isDebugEnabled())
        LOG.debug("deleting: {}", fullIdentifier);

    Path fullPath = new Path(fullIdentifier);

    // do not delete the root directory
    if (fullPath.depth() == 0)
        return true;

    FileSystem fileSystem = getFileSystem(conf);

    try {//from  www  . j  ava 2  s  . co  m
        return fileSystem.delete(fullPath, true);
    } catch (NullPointerException exception) {
        // hack to get around npe thrown when fs reaches root directory
        // removes coupling to the new aws hadoop artifacts that may not be deployed
        if (!(fileSystem.getClass().getSimpleName().equals("NativeS3FileSystem")))
            throw exception;
    }

    return true;
}

From source file:cascading.tap.hadoop.util.Hadoop18TapUtil.java

License:Open Source License

/**
 * copies all files from the taskoutputpath to the outputpath
 *
 * @param conf/*from www. j a  v a2  s .co  m*/
 */
public static void commitTask(Configuration conf) throws IOException {
    Path taskOutputPath = new Path(conf.get("mapred.work.output.dir"));

    FileSystem fs = getFSSafe(conf, taskOutputPath);

    if (fs == null)
        return;

    AtomicInteger integer = pathCounts.get(taskOutputPath.toString());

    if (integer.decrementAndGet() != 0)
        return;

    String taskId = conf.get("mapred.task.id", conf.get("mapreduce.task.id"));

    LOG.info("committing task: '{}' - {}", taskId, taskOutputPath);

    if (taskOutputPath != null) {
        if (writeDirectlyToWorkingPath(conf, taskOutputPath))
            return;

        if (fs.exists(taskOutputPath)) {
            Path jobOutputPath = taskOutputPath.getParent().getParent();
            // Move the task outputs to their final place
            moveTaskOutputs(conf, fs, jobOutputPath, taskOutputPath);

            // Delete the temporary task-specific output directory
            if (!fs.delete(taskOutputPath, true))
                LOG.info("failed to delete the temporary output directory of task: '{}' - {}", taskId,
                        taskOutputPath);

            LOG.info("saved output of task '{}' to {}", taskId, jobOutputPath);
        }
    }
}

From source file:cascading.tap.hadoop.util.Hadoop18TapUtil.java

License:Open Source License

private static synchronized void cleanTempPath(Configuration conf, Path outputPath) throws IOException {
    // do the clean up of temporary directory

    if (outputPath != null) {
        FileSystem fileSys = getFSSafe(conf, outputPath);

        if (fileSys == null)
            return;

        if (!fileSys.exists(outputPath))
            return;

        Path tmpDir = new Path(outputPath, TEMPORARY_PATH);

        LOG.info("deleting temp path {}", tmpDir);

        if (fileSys.exists(tmpDir))
            fileSys.delete(tmpDir, true);
    }/*from   w  w w  .j  ava2s . c o m*/
}