Example usage for org.apache.hadoop.fs FileSystem delete

List of usage examples for org.apache.hadoop.fs FileSystem delete

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem delete.

Prototype

public abstract boolean delete(Path f, boolean recursive) throws IOException;

Source Link

Document

Delete a file.

Usage

From source file:com.cloudera.recordservice.examples.mapreduce.WordCount.java

License:Apache License

public void run(String[] args) throws Exception {
    boolean useRecordService = true;
    if (args.length == 3) {
        useRecordService = Boolean.parseBoolean(args[2]);
    } else if (args.length != 2) {
        System.err.println("Usage: WordCount <input path> <output path>");
        System.exit(-1);//w  w  w .jav a 2s .  c o  m
    }
    String input = args[0].trim();
    String output = args[1];

    JobConf conf = new JobConf(WordCount.class);
    conf.setJobName("wordcount-" + (useRecordService ? "with" : "without") + "-RecordService");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);

    conf.setMapperClass(Map.class);
    conf.setCombinerClass(Reduce.class);
    conf.setReducerClass(Reduce.class);

    if (useRecordService) {
        conf.setInputFormat(com.cloudera.recordservice.mapred.TextInputFormat.class);
        RecordServiceConfig.setInput(conf, input);
    } else {
        conf.setInputFormat(TextInputFormat.class);
        FileInputFormat.setInputPaths(conf, new Path(input));
    }

    FileSystem fs = FileSystem.get(conf);
    Path outputPath = new Path(output);
    if (fs.exists(outputPath))
        fs.delete(outputPath, true);
    conf.setOutputFormat(TextOutputFormat.class);
    FileOutputFormat.setOutputPath(conf, outputPath);

    JobClient.runJob(conf);
    System.out.println("Done");
}

From source file:com.cloudera.science.quince.FileUtils.java

License:Open Source License

public static void deleteSampleGroup(Path path, Configuration conf, String sampleGroup) throws IOException {
    FileSystem fs = path.getFileSystem(conf);
    if (!fs.exists(path)) {
        return;//from w  w  w .  j a  va2  s .  c o m
    }
    for (FileStatus chrStatus : fs.listStatus(path, new PartitionPathFilter("chr"))) {
        for (FileStatus posStatus : fs.listStatus(chrStatus.getPath(), new PartitionPathFilter("pos"))) {
            for (FileStatus sampleGroupStatus : fs.listStatus(posStatus.getPath(),
                    new PartitionPathFilter("sample_group", sampleGroup))) {
                fs.delete(sampleGroupStatus.getPath(), true);
            }
        }
    }
}

From source file:com.cloudera.sparkwordcount.ipWordCount.java

License:Apache License

public static void main(String[] args) {
    JavaSparkContext sc = new JavaSparkContext(
            new SparkConf().set("spark.dynamicAllocation.initialExecutors", "5").setAppName("Spark Count"));
    // sc.addJar("");
    //   final Logger logger = Logger.getLogger("org");
    // logger.setLevel(Level.INFO);
    final int threshold = Integer.parseInt(args[1]);
    JavaRDD<String> stringJavaRDD = sc.textFile(args[0]);
    JavaRDD<String> filteredRDD = stringJavaRDD.filter(new Function<String, Boolean>() {
        @Override//from  ww  w.  ja  v a2s  . c  om
        public Boolean call(String value) throws Exception {
            if (value.contains("TIME_STAMP")) {
                return false;
            }
            RdrRaw line = RdrParser.parseRdr(value);
            if (line == null) {
                System.out.println("can't pars rdr");
                return false;
            }
            String url = line.dstHost;
            if (url.trim().isEmpty()) {
                return false;
            }
            //System.out.println(url);
            return true;
        }
    });
    JavaPairRDD<RdrRaw, Integer> countsIp = filteredRDD.mapToPair(new PairFunction<String, RdrRaw, Integer>() {
        @Override
        public Tuple2<RdrRaw, Integer> call(String s) throws Exception {
            RdrRaw rdrRaw = RdrParser.parseRdr(s);
            return new Tuple2<RdrRaw, Integer>(rdrRaw, 1);
        }
    }).reduceByKey(new Function2<Integer, Integer, Integer>() {
        @Override
        public Integer call(Integer i1, Integer i2) throws Exception {
            return i1 + i2;
        }
    });

    // filter out words with less than threshold occurrences
    JavaPairRDD<RdrRaw, Integer> filtered = countsIp.filter(new Function<Tuple2<RdrRaw, Integer>, Boolean>() {
        @Override
        public Boolean call(Tuple2<RdrRaw, Integer> rdrRawIntegerTuple2) throws Exception {
            return rdrRawIntegerTuple2._2() > threshold;
        }
    });
    JavaPairRDD<Integer, RdrRaw> finalPair = filtered
            .mapToPair(new PairFunction<Tuple2<RdrRaw, Integer>, Integer, RdrRaw>() {
                @Override
                public Tuple2<Integer, RdrRaw> call(Tuple2<RdrRaw, Integer> item) throws Exception {
                    return item.swap();
                }
            }).sortByKey(false);
    //
    List<Tuple2<Integer, RdrRaw>> collect = finalPair.take(10);
    StringBuilder msgBody = new StringBuilder();
    for (Tuple2<Integer, RdrRaw> rdrInTuple2 : collect) {
        RdrRaw rdrRaw = rdrInTuple2._2();
        Integer count = rdrInTuple2._1();
        msgBody.append(rdrRaw.dstHost)
                // .append(rdrRaw.dstParam)
                .append(" found [").append(count).append("]\n");
    }
    Configuration conf = new Configuration();
    try {
        Path p = new Path(args[2]);
        FileSystem fs = FileSystem.get(new Configuration());
        boolean exists = fs.exists(p);
        if (exists) {
            fs.delete(p, true);
        }
        FileSystem hdfs = FileSystem.get(conf);
        FSDataOutputStream out = hdfs.create(p);
        ByteArrayInputStream in = new ByteArrayInputStream(msgBody.toString().getBytes());
        byte buffer[] = new byte[256];
        int bytesRead = 0;
        while ((bytesRead = in.read(buffer)) > 0) {
            out.write(buffer, 0, bytesRead);
        }
        p = new Path(args[2] + "_all");
        if (fs.exists(p)) {
            fs.delete(p, true);
        }
        finalPair.saveAsTextFile(args[2] + "_all");
    } catch (IOException e) {
        e.printStackTrace();
    }

    sc.stop();
    /* Properties props = new Properties();
     props.put("mail.smtps.host","smtp.gmail.com");
     props.put("mail.smtps.auth", "true");
     Session session = Session.getDefaultInstance(props, null);
            
     System.out.println("try send email");
     try {
    Message msg = new MimeMessage(session);
    msg.setFrom(new InternetAddress("spark@hadoop.com", "Spark Generated Message"));
    msg.addRecipient(Message.RecipientType.TO,
            new InternetAddress("fesswoodwork@gmail.com", "Spark Responder"));
    msg.setSubject("Spark task finished");
    msg.setText(msgBody.toString());
    SMTPTransport t =
            (SMTPTransport)session.getTransport("smtps");
    t.connect("smtp.gmail.com", "fesswoodwork", "9610792adc");
    t.sendMessage(msg, msg.getAllRecipients());
    Transport.send(msg);
            
     } catch (AddressException e) {
        e.printStackTrace();
    System.out.println("AddressException "+e.getMessage());
     } catch (MessagingException e) {
    e.printStackTrace();
    System.out.println("MessagingException " + e.getMessage());
     } catch (UnsupportedEncodingException e) {
    e.printStackTrace();
    System.out.println("UnsupportedEncodingException " + e.getMessage());
     }
     System.out.println("sending successfully ends");*/

    /*      // split each document into words
          JavaRDD<String> tokenized = stringJavaRDD.flatMap(
        new FlatMapFunction<String, String>() {
            @Override
            public Iterable<String> call(String s) {
                return Arrays.asList(s.split(" "));
            }
        }
          );
            
          // count the occurrence of each word
          JavaPairRDD<String, Integer> counts = tokenized.mapToPair(
        new PairFunction<String, String, Integer>() {
            @Override
            public Tuple2<String, Integer> call(String s) {
                return new Tuple2<String, Integer>(s, 1);
            }
        }
          ).reduceByKey(
        new Function2<Integer, Integer, Integer>() {
            @Override
            public Integer call(Integer i1, Integer i2) {
                return i1 + i2;
            }
        }
          );
            
          // filter out words with less than threshold occurrences
          JavaPairRDD<String, Integer> filtered = counts.filter(
        new Function<Tuple2<String, Integer>, Boolean>() {
            @Override
            public Boolean call(Tuple2<String, Integer> tup) {
                return tup._2() >= threshold;
            }
        }
          );
            
          // count characters
          JavaPairRDD<Character, Integer> charCounts = filtered.flatMap(
        new FlatMapFunction<Tuple2<String, Integer>, Character>() {
            @Override
            public Iterable<Character> call(Tuple2<String, Integer> s) {
                Collection<Character> chars = new ArrayList<Character>(s._1().length());
                for (char c : s._1().toCharArray()) {
                    chars.add(c);
                }
                return chars;
            }
        }
          ).mapToPair(
        new PairFunction<Character, Character, Integer>() {
            @Override
            public Tuple2<Character, Integer> call(Character c) {
                return new Tuple2<Character, Integer>(c, 1);
            }
        }
          ).reduceByKey(
        new Function2<Integer, Integer, Integer>() {
            @Override
            public Integer call(Integer i1, Integer i2) {
                return i1 + i2;
            }
        }
          );
            
          System.out.println(charCounts.collect());
          */

}

From source file:com.cloudera.sqoop.hive.TestHiveImport.java

License:Apache License

/** Test that we can generate a file containing the DDL and not import. */
@Test//  w ww .j  a  va2 s  . c om
public void testGenerateOnly() throws IOException {
    final String TABLE_NAME = "GenerateOnly";
    setCurTableName(TABLE_NAME);
    setNumCols(1);

    // Figure out where our target generated .q file is going to be.
    SqoopOptions options = getSqoopOptions(getArgv(false, null), new ImportTool());
    Path ddlFile = new Path(new Path(options.getCodeOutputDir()), TABLE_NAME + ".q");
    FileSystem fs = FileSystem.getLocal(new Configuration());

    // If it's already there, remove it before running the test to ensure
    // that it's the current test that generated the file.
    if (fs.exists(ddlFile)) {
        if (!fs.delete(ddlFile, false)) {
            LOG.warn("Could not delete previous ddl file: " + ddlFile);
        }
    }

    // Run a basic import, but specify that we're just generating definitions.
    String[] types = { "INTEGER" };
    String[] vals = { "42" };
    runImportTest(TABLE_NAME, types, vals, null, getCodeGenArgs(), new CodeGenTool());

    // Test that the generated definition file exists.
    assertTrue("Couldn't find expected ddl file", fs.exists(ddlFile));

    Path hiveImportPath = new Path(new Path(options.getWarehouseDir()), TABLE_NAME);
    assertFalse("Import actually happened!", fs.exists(hiveImportPath));
}

From source file:com.cloudera.sqoop.io.TestSplittableBufferedWriter.java

License:Apache License

/** Create the directory where we'll write our test files to; and
 * make sure it has no files in it.//from   www.j av  a  2 s.  c  o  m
 */
private void ensureEmptyWriteDir() throws IOException {
    FileSystem fs = FileSystem.getLocal(getConf());
    Path writeDir = getWritePath();

    fs.mkdirs(writeDir);

    FileStatus[] stats = fs.listStatus(writeDir);

    for (FileStatus stat : stats) {
        if (stat.isDir()) {
            fail("setUp(): Write directory " + writeDir + " contains subdirectories");
        }

        LOG.debug("setUp(): Removing " + stat.getPath());
        if (!fs.delete(stat.getPath(), false)) {
            fail("setUp(): Could not delete residual file " + stat.getPath());
        }
    }

    if (!fs.exists(writeDir)) {
        fail("setUp: Could not create " + writeDir);
    }
}

From source file:com.cloudera.sqoop.lib.TestBlobRef.java

License:Apache License

public void testExternalSubdir() throws IOException {
    final byte[] DATA = { 1, 2, 3, 4, 5 };
    final String FILENAME = "_lob/blobdata";

    try {//from  w  w  w.j  a  va 2  s.c  o m
        doExternalTest(DATA, FILENAME);
    } finally {
        // remove dir we made.
        Configuration conf = new Configuration();
        FileSystem fs = FileSystem.getLocal(conf);
        String tmpDir = System.getProperty("test.build.data", "/tmp/");
        Path lobDir = new Path(new Path(tmpDir), "_lob");
        fs.delete(lobDir, true);
    }
}

From source file:com.cloudera.sqoop.lib.TestBlobRef.java

License:Apache License

private void doExternalTest(final byte[] data, final String filename) throws IOException {

    Configuration conf = new Configuration();
    if (!BaseSqoopTestCase.isOnPhysicalCluster()) {
        conf.set(CommonArgs.FS_DEFAULT_NAME, CommonArgs.LOCAL_FS);
    }/*from w  w w.  ja va 2 s.  c  o  m*/
    FileSystem fs = FileSystem.get(conf);
    String tmpDir = System.getProperty("test.build.data", "/tmp/");

    Path tmpPath = new Path(tmpDir);
    Path blobFile = new Path(tmpPath, filename);

    // make any necessary parent dirs.
    Path blobParent = blobFile.getParent();
    if (!fs.exists(blobParent)) {
        fs.mkdirs(blobParent);
    }

    LobFile.Writer lw = LobFile.create(blobFile, conf, false);
    try {
        long off = lw.tell();
        long len = data.length;
        OutputStream os = lw.writeBlobRecord(len);
        os.write(data, 0, data.length);
        os.close();
        lw.close();

        String refString = "externalLob(lf," + filename + "," + off + "," + len + ")";
        BlobRef blob = BlobRef.parse(refString);
        assertTrue(blob.isExternal());
        assertEquals(refString, blob.toString());
        InputStream is = blob.getDataStream(conf, tmpPath);
        assertNotNull(is);

        byte[] buf = new byte[4096];
        int bytes = is.read(buf, 0, 4096);
        is.close();

        assertEquals(data.length, bytes);
        for (int i = 0; i < bytes; i++) {
            assertEquals(data[i], buf[i]);
        }
    } finally {
        fs.delete(blobFile, false);
    }
}

From source file:com.cloudera.sqoop.lib.TestClobRef.java

License:Apache License

public void testExternalSubdir() throws IOException {
    final String DATA = "This is the clob data!";
    final String FILENAME = "_lob/clobdata";

    try {/*from  w  w  w .jav  a 2 s  . c o  m*/
        doExternalTest(DATA, FILENAME);
    } finally {
        // remove dir we made.
        Configuration conf = new Configuration();
        FileSystem fs = FileSystem.getLocal(conf);
        String tmpDir = System.getProperty("test.build.data", "/tmp/");
        Path lobDir = new Path(new Path(tmpDir), "_lob");
        fs.delete(lobDir, true);
    }
}

From source file:com.cloudera.sqoop.lib.TestClobRef.java

License:Apache License

private void doExternalTest(final String data, final String filename) throws IOException {

    Configuration conf = new Configuration();
    if (!BaseSqoopTestCase.isOnPhysicalCluster()) {
        conf.set(CommonArgs.FS_DEFAULT_NAME, CommonArgs.LOCAL_FS);
    }/*from w  w  w . ja  va  2s  . co  m*/
    FileSystem fs = FileSystem.get(conf);
    String tmpDir = System.getProperty("test.build.data", "/tmp/");

    Path tmpPath = new Path(tmpDir);
    Path clobFile = new Path(tmpPath, filename);

    // make any necessary parent dirs.
    Path clobParent = clobFile.getParent();
    if (!fs.exists(clobParent)) {
        fs.mkdirs(clobParent);
    }

    LobFile.Writer lw = LobFile.create(clobFile, conf, true);
    try {
        long off = lw.tell();
        long len = data.length();
        Writer w = lw.writeClobRecord(len);
        w.append(data);
        w.close();
        lw.close();

        String refString = "externalLob(lf," + filename + "," + off + "," + len + ")";
        ClobRef clob = ClobRef.parse(refString);
        assertTrue(clob.isExternal());
        assertEquals(refString, clob.toString());
        Reader r = clob.getDataStream(conf, tmpPath);
        assertNotNull(r);

        char[] buf = new char[4096];
        int chars = r.read(buf, 0, 4096);
        r.close();

        String str = new String(buf, 0, chars);
        assertEquals(data, str);
    } finally {
        fs.delete(clobFile, false);
    }
}

From source file:com.cloudera.sqoop.lib.TestLargeObjectLoader.java

License:Apache License

public void setUp() throws IOException, InterruptedException {
    conf = new Configuration();
    if (!BaseSqoopTestCase.isOnPhysicalCluster()) {
        conf.set(CommonArgs.FS_DEFAULT_NAME, CommonArgs.LOCAL_FS);
    }/*w ww  .  j av a  2s. com*/
    String tmpDir = System.getProperty("test.build.data", "/tmp/");
    this.outDir = new Path(new Path(tmpDir), "testLobLoader");
    FileSystem fs = FileSystem.get(conf);
    if (fs.exists(outDir)) {
        fs.delete(outDir, true);
    }
    fs.mkdirs(outDir);

    loader = new LargeObjectLoader(conf, outDir);
}