Example usage for org.apache.hadoop.fs FileSystem delete

List of usage examples for org.apache.hadoop.fs FileSystem delete

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem delete.

Prototype

public abstract boolean delete(Path f, boolean recursive) throws IOException;

Source Link

Document

Delete a file.

Usage

From source file:com.ML_Hadoop.NaiveBayesClassifier_Continuous_Features.NaiveBayesClassifierMapReduce_Continuous_Features.java

/**
 * @param args/* w ww . j a  v  a2s  . c  om*/
 * @throws IOException 
 * @throws ClassNotFoundException 
 * @throws InterruptedException 
 */
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {

    int number_of_classes = 1;
    int number_of_features = 1;
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);

    Job job = new Job(conf, "NaiveBayesClassifierMapReduce_Continuous_Features");
    job.setJarByClass(NaiveBayesClassifierMapReduce_Continuous_Features.class);

    conf = job.getConfiguration(); // This line is mandatory. 

    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(FloatArrayWritable.class);

    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(MapArrayWritable.class);

    job.setMapperClass(NaiveBayesClassifierMap_Continuous_Features.class);
    job.setReducerClass(NaiveBayesClassifierReduce_Continuous_Features.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setNumReduceTasks(1);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    Path out = new Path(args[1]);
    if (fs.exists(out))
        fs.delete(out, true);
    FileOutputFormat.setOutputPath(job, out);
    number_of_classes = Integer.parseInt(args[2]);
    number_of_features = Integer.parseInt(args[3]);
    conf.setInt("number_of_classes", number_of_classes);
    conf.setInt("number_of_features", number_of_features);

    try {
        job.waitForCompletion(true);

    } catch (IOException e) {
        e.printStackTrace();
    }
}

From source file:com.ML_Hadoop.NaiveBayesClassifier_Continuous_Features.NaiveBayesClassifierReduce_Continuous_Features.java

@Override
protected void cleanup(Context context) throws IOException {
    String uri = "/user/hduser/naive_bayes_continuous.txt";
    Path path = new Path(uri);

    try {/*from  w  ww .ja v  a  2  s  .c  o  m*/
        FileSystem fs = FileSystem.get(URI.create(uri), context.getConfiguration());
        if (fs.exists(path))
            fs.delete(path, true);
        BufferedWriter br = new BufferedWriter(new OutputStreamWriter(fs.create(path, true)));
        br.write("class_id,     mu(mean),     std");
        br.write("-------------------------------\n");
        for (int i = 0; i < number_of_classes; i++) {
            br.write("-------- Class-" + i + "-------\n");
            for (int j = 0; j < number_of_features; j++) {
                br.write(((FloatWritable) probablity_info_output.get(i)[j].get(new Text("class_id_mu")))
                        + ",  ");
                br.write(((FloatWritable) probablity_info_output.get(i)[j].get(new Text("class_id_std")))
                        + "\n");
            }
            br.write("\n");
        }
        br.close();
    } catch (Exception e) {
        System.out.println("File /user/hduser/naive_bayes_continuous.txt cannot be found");
    }

}

From source file:com.moz.fiji.mapreduce.IntegrationTestFijiTableInputFormat.java

License:Apache License

/** Test FijiTableInputFormat in a map-only job. */
@Test/*from   w  w  w  .jav  a  2 s  .co m*/
public void testMapJob() throws Exception {
    final Path outputFile = createOutputFile();
    // Create a test job.
    final Job job = setupJob("testMapJob", outputFile, TestMapper.class, null, // reducer class
            null, // start key
            null, // limit key
            null); // filter

    // Run the job.
    assertTrue("Hadoop job failed", job.waitForCompletion(true));

    // Check to make sure output exists.
    final FileSystem fs = FileSystem.get(job.getConfiguration());
    assertTrue(fs.exists(outputFile.getParent()));

    // Verify that the output matches what's expected.
    final FSDataInputStream in = fs.open(outputFile);
    final Set<String> actual = Sets.newHashSet(IOUtils.toString(in).trim().split("\n"));
    final Set<String> expected = Sets.newHashSet("usermail.example.com\tAaron Kimball", "gmail.com\tJohn Doe",
            "usermail.example.com\tChristophe Bisciglia", "usermail.example.com\tKiyan Ahmadizadeh",
            "gmail.com\tJane Doe", "usermail.example.com\tGarrett Wu");
    assertEquals("Result of job wasn't what was expected", expected, actual);

    // Clean up.
    fs.delete(outputFile.getParent(), true);

    IOUtils.closeQuietly(in);
    // NOTE: fs should get closed here, but doesn't because of a bug with FileSystem that
    // causes it to close other thread's filesystem objects. For more information
    // see: https://issues.apache.org/jira/browse/HADOOP-7973
}

From source file:com.moz.fiji.mapreduce.IntegrationTestFijiTableInputFormat.java

License:Apache License

/** Test FijiTableInputFormat in a map-only job with start and limit keys. */
@Test// w  w w  . j ava  2 s.co m
public void testMapJobWithStartAndLimitKeys() throws Exception {
    final Path outputFile = createOutputFile();
    // Set the same entity IDs for start and limit, and we should get just the start row
    final EntityId startEntityId = getFooTable().getEntityId("jane.doe@gmail.com");
    final byte[] endRowKey = startEntityId.getHBaseRowKey();
    final EntityId rawLimitEntityId = HBaseEntityId
            .fromHBaseRowKey(Arrays.copyOf(endRowKey, endRowKey.length + 1));

    // Create a test job.
    final Job job = setupJob("testMapJobWithStartAndLimitKeys", outputFile, TestMapper.class, null, // reducer class
            startEntityId, rawLimitEntityId, null); // filter

    // Run the job.
    assertTrue("Hadoop job failed", job.waitForCompletion(true));

    // Check to make sure output exists.
    final FileSystem fs = FileSystem.get(job.getConfiguration());
    assertTrue(fs.exists(outputFile.getParent()));

    // Verify that the output matches what's expected.
    final FSDataInputStream in = fs.open(outputFile);
    final Set<String> actual = Sets.newHashSet(IOUtils.toString(in).trim().split("\n"));
    final Set<String> expected = Sets.newHashSet("gmail.com\tJane Doe");
    assertEquals("Result of job wasn't what was expected", expected, actual);

    // Clean up.
    fs.delete(outputFile.getParent(), true);

    IOUtils.closeQuietly(in);
    // NOTE: fs should get closed here, but doesn't because of a bug with FileSystem that
    // causes it to close other thread's filesystem objects. For more information
    // see: https://issues.apache.org/jira/browse/HADOOP-7973
}

From source file:com.moz.fiji.mapreduce.IntegrationTestFijiTableInputFormat.java

License:Apache License

/** Test FijiTableInputFormat in a map-only job with a row filter. */
@Test/*  w  w  w . ja  va2s . c  om*/
public void testMapJobWithFilter() throws Exception {
    final FijiRowFilter filter = new ColumnValueEqualsRowFilter("info", "email",
            new DecodedCell<String>(Schema.create(Schema.Type.STRING), "aaron@usermail.example.com"));
    final Path outputFile = createOutputFile();
    // Create a test job.
    final Job job = setupJob("testMapJobWithFilter", outputFile, TestMapper.class, null, // reducer class
            null, // start key
            null, // limit key
            filter);

    // Run the job.
    assertTrue("Hadoop job failed", job.waitForCompletion(true));

    // Check to make sure output exists.
    final FileSystem fs = FileSystem.get(job.getConfiguration());
    assertTrue(fs.exists(outputFile.getParent()));

    // Verify that the output matches what's expected.
    final FSDataInputStream in = fs.open(outputFile);
    final Set<String> actual = Sets.newHashSet(IOUtils.toString(in).trim().split("\n"));
    final Set<String> expected = Sets.newHashSet("usermail.example.com\tAaron Kimball");
    assertEquals("Result of job wasn't what was expected", expected, actual);

    // Clean up.
    fs.delete(outputFile.getParent(), true);

    IOUtils.closeQuietly(in);
    // NOTE: fs should get closed here, but doesn't because of a bug with FileSystem that
    // causes it to close other thread's filesystem objects. For more information
    // see: https://issues.apache.org/jira/browse/HADOOP-7973
}

From source file:com.moz.fiji.mapreduce.IntegrationTestFijiTableInputFormat.java

License:Apache License

/** Test FijiTableInputFormat in a MapReduce job. */
@Test/*w ww .j a v a 2 s  .c o m*/
public void testMapReduceJob() throws Exception {
    final Path outputFile = createOutputFile();
    // Create a test job.
    final Job job = setupJob("testMapReduceJob", outputFile, TestMapper.class, TestReducer.class, null, // start key
            null, // limit key
            null); // filter

    // Run the job.
    assertTrue("Hadoop job failed", job.waitForCompletion(true));

    // Check to make sure output exists.
    final FileSystem fs = FileSystem.get(job.getConfiguration());
    assertTrue(fs.exists(outputFile.getParent()));

    // Verify that the output matches what's expected.
    final FSDataInputStream in = fs.open(outputFile);
    final Set<String> output = Sets.newHashSet(IOUtils.toString(in).trim().split("\n"));
    final ImmutableMap.Builder<String, Set<String>> builder = ImmutableMap.builder();
    for (String line : output) {
        final String[] keyValue = line.split("\t");
        final String emailDomain = keyValue[0];
        final Set<String> names = Sets.newHashSet(keyValue[1].split(","));

        builder.put(emailDomain, names);
    }
    final Map<String, Set<String>> actual = builder.build();
    final Map<String, Set<String>> expected = ImmutableMap.<String, Set<String>>builder()
            .put("usermail.example.com",
                    Sets.newHashSet("Aaron Kimball", "Christophe Bisciglia", "Kiyan Ahmadizadeh", "Garrett Wu"))
            .put("gmail.com", Sets.newHashSet("John Doe", "Jane Doe")).build();
    assertEquals("Result of job wasn't what was expected", expected, actual);

    // Clean up.
    fs.delete(outputFile.getParent(), true);

    IOUtils.closeQuietly(in);
    // NOTE: fs should get closed here, but doesn't because of a bug with FileSystem that
    // causes it to close other thread's filesystem objects. For more information
    // see: https://issues.apache.org/jira/browse/HADOOP-7973
}

From source file:com.mozilla.grouperfish.transforms.coclustering.pig.storage.MahoutVectorStorage.java

License:Apache License

public static void cleanupOnFailureImpl(String location, Job job) throws IOException {
    Path path = new Path(location);
    FileSystem fs = path.getFileSystem(job.getConfiguration());
    if (fs.exists(path)) {
        fs.delete(path, true);
    }/*from  ww w. j  av  a 2 s .  c o m*/
}

From source file:com.mozilla.hadoop.Backup.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length < 2) {
        return printUsage();
    }/*  w w w  .  j a va2s  .c o  m*/

    int rc = -1;
    Job job = initJob(args);
    job.waitForCompletion(true);
    if (job.isSuccessful()) {
        rc = 0;

        FileSystem hdfs = null;
        try {
            hdfs = FileSystem.get(job.getConfiguration());
            hdfs.delete(new Path(NAME + "-inputsource*.txt"), false);
        } finally {
            checkAndClose(hdfs);
        }
    }

    return rc;
}

From source file:com.mozilla.hadoop.UnknownPathFinder.java

License:Apache License

/**
 * Deletes all of the paths specified//from w  w  w . j a  v a  2 s  .c  o m
 * @param conf
 * @param paths
 * @return
 * @throws IOException
 */
public static boolean deleteFilesystemPaths(Configuration conf, Collection<String> paths) throws IOException {
    boolean success = true;

    FileSystem hdfs = null;
    try {
        hdfs = FileSystem.get(conf);
        for (String s : paths) {
            Path p = new Path(s);
            if (!hdfs.delete(p, true)) {
                LOG.info("Failed to delete: " + s);
                success = false;
                break;
            } else {
                LOG.info("Successfully deleted: " + s);
            }
        }
    } finally {
        if (hdfs != null) {
            hdfs.close();
        }
    }

    return success;
}

From source file:com.mvad.flink.demo.streaming.lib.sink.bucketing.BucketingSink.java

License:Apache License

@Override
public void open(Configuration parameters) throws Exception {
    super.open(parameters);

    subtaskIndex = getRuntimeContext().getIndexOfThisSubtask();

    state = new State<T>();

    Path baseDirectory = new Path(basePath);
    hadoopConf = HadoopFileSystem.getHadoopConfiguration();
    FileSystem fs = baseDirectory.getFileSystem(hadoopConf);
    refTruncate = reflectTruncate(fs);/*  ww  w . ja v  a2 s.c o m*/

    long currentProcessingTime = ((StreamingRuntimeContext) getRuntimeContext()).getCurrentProcessingTime();

    checkForInactiveBuckets(currentProcessingTime);

    ((StreamingRuntimeContext) getRuntimeContext())
            .registerTimer(currentProcessingTime + inactiveBucketCheckInterval, this);

    this.clock = new Clock() {
        @Override
        public long currentTimeMillis() {
            return ((StreamingRuntimeContext) getRuntimeContext()).getCurrentProcessingTime();
        }
    };

    // delete pending/in-progress files that might be left if we fail while
    // no checkpoint has yet been done
    try {
        if (fs.exists(baseDirectory) && cleanupOnOpen) {
            RemoteIterator<LocatedFileStatus> bucketFiles = fs.listFiles(baseDirectory, true);

            while (bucketFiles.hasNext()) {
                LocatedFileStatus file = bucketFiles.next();
                if (file.getPath().toString().endsWith(pendingSuffix)) {
                    // only delete files that contain our subtask index
                    if (file.getPath().toString().contains(partPrefix + "-" + subtaskIndex + "-")) {
                        LOG.debug("(OPEN) Deleting leftover pending file {}", file.getPath().toString());
                        fs.delete(file.getPath(), true);
                    }
                }
                if (file.getPath().toString().endsWith(inProgressSuffix)) {
                    // only delete files that contain our subtask index
                    if (file.getPath().toString().contains(partPrefix + "-" + subtaskIndex + "-")) {
                        LOG.debug("(OPEN) Deleting leftover in-progress file {}", file.getPath().toString());
                        fs.delete(file.getPath(), true);
                    }
                }
            }
        }
    } catch (IOException e) {
        LOG.error("Error while deleting leftover pending/in-progress files: {}", e);
        throw new RuntimeException("Error while deleting leftover pending/in-progress files.", e);
    }
}