Example usage for org.apache.hadoop.fs FileSystem delete

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem delete.

Prototype

public abstract boolean delete(Path f, boolean recursive) throws IOException;

Source Link

Document

Delete a file.

Usage

From source file:com.ML_Hadoop.NaiveBayesClassifier_Continuous_Features.NaiveBayesClassifierMapReduce_Continuous_Features.java

/**
 * @param args/* w ww . j a  v  a2s  . c  om*/
 * @throws IOException 
 * @throws ClassNotFoundException 
 * @throws InterruptedException 
 */
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {

    int number_of_classes = 1;
    int number_of_features = 1;
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);

    Job job = new Job(conf, "NaiveBayesClassifierMapReduce_Continuous_Features");
    job.setJarByClass(NaiveBayesClassifierMapReduce_Continuous_Features.class);

    conf = job.getConfiguration(); // This line is mandatory. 

    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(FloatArrayWritable.class);

    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(MapArrayWritable.class);

    job.setMapperClass(NaiveBayesClassifierMap_Continuous_Features.class);
    job.setReducerClass(NaiveBayesClassifierReduce_Continuous_Features.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setNumReduceTasks(1);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    Path out = new Path(args[1]);
    if (fs.exists(out))
        fs.delete(out, true);
    FileOutputFormat.setOutputPath(job, out);
    number_of_classes = Integer.parseInt(args[2]);
    number_of_features = Integer.parseInt(args[3]);
    conf.setInt("number_of_classes", number_of_classes);
    conf.setInt("number_of_features", number_of_features);

    try {
        job.waitForCompletion(true);

    } catch (IOException e) {
        e.printStackTrace();
    }
}

From source file:com.ML_Hadoop.NaiveBayesClassifier_Continuous_Features.NaiveBayesClassifierReduce_Continuous_Features.java

@Override
protected void cleanup(Context context) throws IOException {
    String uri = "/user/hduser/naive_bayes_continuous.txt";
    Path path = new Path(uri);

    try {/*from  w  ww .ja v  a  2  s  .c  o  m*/
        FileSystem fs = FileSystem.get(URI.create(uri), context.getConfiguration());
        if (fs.exists(path))
            fs.delete(path, true);
        BufferedWriter br = new BufferedWriter(new OutputStreamWriter(fs.create(path, true)));
        br.write("class_id,     mu(mean),     std");
        br.write("-------------------------------\n");
        for (int i = 0; i < number_of_classes; i++) {
            br.write("-------- Class-" + i + "-------\n");
            for (int j = 0; j < number_of_features; j++) {
                br.write(((FloatWritable) probablity_info_output.get(i)[j].get(new Text("class_id_mu")))
                        + ",  ");
                br.write(((FloatWritable) probablity_info_output.get(i)[j].get(new Text("class_id_std")))
                        + "\n");
            }
            br.write("\n");
        }
        br.close();
    } catch (Exception e) {
        System.out.println("File /user/hduser/naive_bayes_continuous.txt cannot be found");
    }

}

From source file:com.moz.fiji.mapreduce.IntegrationTestFijiTableInputFormat.java

License:Apache License

/** Test FijiTableInputFormat in a map-only job. */
@Test/*from   w  w  w  .jav  a  2 s  .co m*/
public void testMapJob() throws Exception {
    final Path outputFile = createOutputFile();
    // Create a test job.
    final Job job = setupJob("testMapJob", outputFile, TestMapper.class, null, // reducer class
            null, // start key
            null, // limit key
            null); // filter

    // Run the job.
    assertTrue("Hadoop job failed", job.waitForCompletion(true));

    // Check to make sure output exists.
    final FileSystem fs = FileSystem.get(job.getConfiguration());
    assertTrue(fs.exists(outputFile.getParent()));

    // Verify that the output matches what's expected.
    final FSDataInputStream in = fs.open(outputFile);
    final Set<String> actual = Sets.newHashSet(IOUtils.toString(in).trim().split("\n"));
    final Set<String> expected = Sets.newHashSet("usermail.example.com\tAaron Kimball", "gmail.com\tJohn Doe",
            "usermail.example.com\tChristophe Bisciglia", "usermail.example.com\tKiyan Ahmadizadeh",
            "gmail.com\tJane Doe", "usermail.example.com\tGarrett Wu");
    assertEquals("Result of job wasn't what was expected", expected, actual);

    // Clean up.
    fs.delete(outputFile.getParent(), true);

    IOUtils.closeQuietly(in);
    // NOTE: fs should get closed here, but doesn't because of a bug with FileSystem that
    // causes it to close other thread's filesystem objects. For more information
    // see: https://issues.apache.org/jira/browse/HADOOP-7973
}

From source file:com.moz.fiji.mapreduce.IntegrationTestFijiTableInputFormat.java

License:Apache License

/** Test FijiTableInputFormat in a map-only job with start and limit keys. */
@Test// w  w w  . j ava  2 s.co m
public void testMapJobWithStartAndLimitKeys() throws Exception {
    final Path outputFile = createOutputFile();
    // Set the same entity IDs for start and limit, and we should get just the start row
    final EntityId startEntityId = getFooTable().getEntityId("jane.doe@gmail.com");
    final byte[] endRowKey = startEntityId.getHBaseRowKey();
    final EntityId rawLimitEntityId = HBaseEntityId
            .fromHBaseRowKey(Arrays.copyOf(endRowKey, endRowKey.length + 1));

    // Create a test job.
    final Job job = setupJob("testMapJobWithStartAndLimitKeys", outputFile, TestMapper.class, null, // reducer class
            startEntityId, rawLimitEntityId, null); // filter

    // Run the job.
    assertTrue("Hadoop job failed", job.waitForCompletion(true));

    // Check to make sure output exists.
    final FileSystem fs = FileSystem.get(job.getConfiguration());
    assertTrue(fs.exists(outputFile.getParent()));

    // Verify that the output matches what's expected.
    final FSDataInputStream in = fs.open(outputFile);
    final Set<String> actual = Sets.newHashSet(IOUtils.toString(in).trim().split("\n"));
    final Set<String> expected = Sets.newHashSet("gmail.com\tJane Doe");
    assertEquals("Result of job wasn't what was expected", expected, actual);

    // Clean up.
    fs.delete(outputFile.getParent(), true);

    IOUtils.closeQuietly(in);
    // NOTE: fs should get closed here, but doesn't because of a bug with FileSystem that
    // causes it to close other thread's filesystem objects. For more information
    // see: https://issues.apache.org/jira/browse/HADOOP-7973
}

From source file:com.moz.fiji.mapreduce.IntegrationTestFijiTableInputFormat.java

License:Apache License

/** Test FijiTableInputFormat in a map-only job with a row filter. */
@Test/*  w  w  w . ja  va2s . c  om*/
public void testMapJobWithFilter() throws Exception {
    final FijiRowFilter filter = new ColumnValueEqualsRowFilter("info", "email",
            new DecodedCell<String>(Schema.create(Schema.Type.STRING), "aaron@usermail.example.com"));
    final Path outputFile = createOutputFile();
    // Create a test job.
    final Job job = setupJob("testMapJobWithFilter", outputFile, TestMapper.class, null, // reducer class
            null, // start key
            null, // limit key
            filter);

    // Run the job.
    assertTrue("Hadoop job failed", job.waitForCompletion(true));

    // Check to make sure output exists.
    final FileSystem fs = FileSystem.get(job.getConfiguration());
    assertTrue(fs.exists(outputFile.getParent()));

    // Verify that the output matches what's expected.
    final FSDataInputStream in = fs.open(outputFile);
    final Set<String> actual = Sets.newHashSet(IOUtils.toString(in).trim().split("\n"));
    final Set<String> expected = Sets.newHashSet("usermail.example.com\tAaron Kimball");
    assertEquals("Result of job wasn't what was expected", expected, actual);

    // Clean up.
    fs.delete(outputFile.getParent(), true);

    IOUtils.closeQuietly(in);
    // NOTE: fs should get closed here, but doesn't because of a bug with FileSystem that
    // causes it to close other thread's filesystem objects. For more information
    // see: https://issues.apache.org/jira/browse/HADOOP-7973
}

From source file:com.moz.fiji.mapreduce.IntegrationTestFijiTableInputFormat.java

License:Apache License

/** Test FijiTableInputFormat in a MapReduce job. */
@Test/*w ww .j a v a 2 s  .c o m*/
public void testMapReduceJob() throws Exception {
    final Path outputFile = createOutputFile();
    // Create a test job.
    final Job job = setupJob("testMapReduceJob", outputFile, TestMapper.class, TestReducer.class, null, // start key
            null, // limit key
            null); // filter

    // Run the job.
    assertTrue("Hadoop job failed", job.waitForCompletion(true));

    // Check to make sure output exists.
    final FileSystem fs = FileSystem.get(job.getConfiguration());
    assertTrue(fs.exists(outputFile.getParent()));

    // Verify that the output matches what's expected.
    final FSDataInputStream in = fs.open(outputFile);
    final Set<String> output = Sets.newHashSet(IOUtils.toString(in).trim().split("\n"));
    final ImmutableMap.Builder<String, Set<String>> builder = ImmutableMap.builder();
    for (String line : output) {
        final String[] keyValue = line.split("\t");
        final String emailDomain = keyValue[0];
        final Set<String> names = Sets.newHashSet(keyValue[1].split(","));

        builder.put(emailDomain, names);
    }
    final Map<String, Set<String>> actual = builder.build();
    final Map<String, Set<String>> expected = ImmutableMap.<String, Set<String>>builder()
            .put("usermail.example.com",
                    Sets.newHashSet("Aaron Kimball", "Christophe Bisciglia", "Kiyan Ahmadizadeh", "Garrett Wu"))
            .put("gmail.com", Sets.newHashSet("John Doe", "Jane Doe")).build();
    assertEquals("Result of job wasn't what was expected", expected, actual);

    // Clean up.
    fs.delete(outputFile.getParent(), true);

    IOUtils.closeQuietly(in);
    // NOTE: fs should get closed here, but doesn't because of a bug with FileSystem that
    // causes it to close other thread's filesystem objects. For more information
    // see: https://issues.apache.org/jira/browse/HADOOP-7973
}

From source file:com.mozilla.grouperfish.transforms.coclustering.pig.storage.MahoutVectorStorage.java

License:Apache License

public static void cleanupOnFailureImpl(String location, Job job) throws IOException {
    Path path = new Path(location);
    FileSystem fs = path.getFileSystem(job.getConfiguration());
    if (fs.exists(path)) {
        fs.delete(path, true);
    }/*from  ww w. j  av  a 2 s .  c o m*/
}

From source file:com.mozilla.hadoop.Backup.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length < 2) {
        return printUsage();
    }/*  w w w  .  j a va2s  .c o  m*/

    int rc = -1;
    Job job = initJob(args);
    job.waitForCompletion(true);
    if (job.isSuccessful()) {
        rc = 0;

        FileSystem hdfs = null;
        try {
            hdfs = FileSystem.get(job.getConfiguration());
            hdfs.delete(new Path(NAME + "-inputsource*.txt"), false);
        } finally {
            checkAndClose(hdfs);
        }
    }

    return rc;
}

From source file:com.mozilla.hadoop.UnknownPathFinder.java

License:Apache License

/**
 * Deletes all of the paths specified//from w  w  w . j a  v a  2 s  .c  o m
 * @param conf
 * @param paths
 * @return
 * @throws IOException
 */
public static boolean deleteFilesystemPaths(Configuration conf, Collection<String> paths) throws IOException {
    boolean success = true;

    FileSystem hdfs = null;
    try {
        hdfs = FileSystem.get(conf);
        for (String s : paths) {
            Path p = new Path(s);
            if (!hdfs.delete(p, true)) {
                LOG.info("Failed to delete: " + s);
                success = false;
                break;
            } else {
                LOG.info("Successfully deleted: " + s);
            }
        }
    } finally {
        if (hdfs != null) {
            hdfs.close();
        }
    }

    return success;
}

From source file:com.mvad.flink.demo.streaming.lib.sink.bucketing.BucketingSink.java

License:Apache License

@Override
public void open(Configuration parameters) throws Exception {
    super.open(parameters);

    subtaskIndex = getRuntimeContext().getIndexOfThisSubtask();

    state = new State<T>();

    Path baseDirectory = new Path(basePath);
    hadoopConf = HadoopFileSystem.getHadoopConfiguration();
    FileSystem fs = baseDirectory.getFileSystem(hadoopConf);
    refTruncate = reflectTruncate(fs);/*  ww  w . ja v  a2 s.c o m*/

    long currentProcessingTime = ((StreamingRuntimeContext) getRuntimeContext()).getCurrentProcessingTime();

    checkForInactiveBuckets(currentProcessingTime);

    ((StreamingRuntimeContext) getRuntimeContext())
            .registerTimer(currentProcessingTime + inactiveBucketCheckInterval, this);

    this.clock = new Clock() {
        @Override
        public long currentTimeMillis() {
            return ((StreamingRuntimeContext) getRuntimeContext()).getCurrentProcessingTime();
        }
    };

    // delete pending/in-progress files that might be left if we fail while
    // no checkpoint has yet been done
    try {
        if (fs.exists(baseDirectory) && cleanupOnOpen) {
            RemoteIterator<LocatedFileStatus> bucketFiles = fs.listFiles(baseDirectory, true);

            while (bucketFiles.hasNext()) {
                LocatedFileStatus file = bucketFiles.next();
                if (file.getPath().toString().endsWith(pendingSuffix)) {
                    // only delete files that contain our subtask index
                    if (file.getPath().toString().contains(partPrefix + "-" + subtaskIndex + "-")) {
                        LOG.debug("(OPEN) Deleting leftover pending file {}", file.getPath().toString());
                        fs.delete(file.getPath(), true);
                    }
                }
                if (file.getPath().toString().endsWith(inProgressSuffix)) {
                    // only delete files that contain our subtask index
                    if (file.getPath().toString().contains(partPrefix + "-" + subtaskIndex + "-")) {
                        LOG.debug("(OPEN) Deleting leftover in-progress file {}", file.getPath().toString());
                        fs.delete(file.getPath(), true);
                    }
                }
            }
        }
    } catch (IOException e) {
        LOG.error("Error while deleting leftover pending/in-progress files: {}", e);
        throw new RuntimeException("Error while deleting leftover pending/in-progress files.", e);
    }
}