Example usage for org.apache.hadoop.fs FileSystem delete

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem delete.

Prototype

public abstract boolean delete(Path f, boolean recursive) throws IOException;

Source Link

Document

Delete a file.

Usage

From source file:com.cloudera.recordservice.examples.mapreduce.WordCount.java

License:Apache License

public void run(String[] args) throws Exception {
    boolean useRecordService = true;
    if (args.length == 3) {
        useRecordService = Boolean.parseBoolean(args[2]);
    } else if (args.length != 2) {
        System.err.println("Usage: WordCount <input path> <output path>");
        System.exit(-1);//w  w  w .jav a 2s .  c o  m
    }
    String input = args[0].trim();
    String output = args[1];

    JobConf conf = new JobConf(WordCount.class);
    conf.setJobName("wordcount-" + (useRecordService ? "with" : "without") + "-RecordService");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);

    conf.setMapperClass(Map.class);
    conf.setCombinerClass(Reduce.class);
    conf.setReducerClass(Reduce.class);

    if (useRecordService) {
        conf.setInputFormat(com.cloudera.recordservice.mapred.TextInputFormat.class);
        RecordServiceConfig.setInput(conf, input);
    } else {
        conf.setInputFormat(TextInputFormat.class);
        FileInputFormat.setInputPaths(conf, new Path(input));
    }

    FileSystem fs = FileSystem.get(conf);
    Path outputPath = new Path(output);
    if (fs.exists(outputPath))
        fs.delete(outputPath, true);
    conf.setOutputFormat(TextOutputFormat.class);
    FileOutputFormat.setOutputPath(conf, outputPath);

    JobClient.runJob(conf);
    System.out.println("Done");
}

From source file:com.cloudera.science.quince.FileUtils.java

License:Open Source License

public static void deleteSampleGroup(Path path, Configuration conf, String sampleGroup) throws IOException {
    FileSystem fs = path.getFileSystem(conf);
    if (!fs.exists(path)) {
        return;//from w  w  w .  j a  va2  s .  c o m
    }
    for (FileStatus chrStatus : fs.listStatus(path, new PartitionPathFilter("chr"))) {
        for (FileStatus posStatus : fs.listStatus(chrStatus.getPath(), new PartitionPathFilter("pos"))) {
            for (FileStatus sampleGroupStatus : fs.listStatus(posStatus.getPath(),
                    new PartitionPathFilter("sample_group", sampleGroup))) {
                fs.delete(sampleGroupStatus.getPath(), true);
            }
        }
    }
}

From source file:com.cloudera.sparkwordcount.ipWordCount.java

License:Apache License

public static void main(String[] args) {
    JavaSparkContext sc = new JavaSparkContext(
            new SparkConf().set("spark.dynamicAllocation.initialExecutors", "5").setAppName("Spark Count"));
    // sc.addJar("");
    //   final Logger logger = Logger.getLogger("org");
    // logger.setLevel(Level.INFO);
    final int threshold = Integer.parseInt(args[1]);
    JavaRDD<String> stringJavaRDD = sc.textFile(args[0]);
    JavaRDD<String> filteredRDD = stringJavaRDD.filter(new Function<String, Boolean>() {
        @Override//from  ww  w.  ja  v a2s  . c  om
        public Boolean call(String value) throws Exception {
            if (value.contains("TIME_STAMP")) {
                return false;
            }
            RdrRaw line = RdrParser.parseRdr(value);
            if (line == null) {
                System.out.println("can't pars rdr");
                return false;
            }
            String url = line.dstHost;
            if (url.trim().isEmpty()) {
                return false;
            }
            //System.out.println(url);
            return true;
        }
    });
    JavaPairRDD<RdrRaw, Integer> countsIp = filteredRDD.mapToPair(new PairFunction<String, RdrRaw, Integer>() {
        @Override
        public Tuple2<RdrRaw, Integer> call(String s) throws Exception {
            RdrRaw rdrRaw = RdrParser.parseRdr(s);
            return new Tuple2<RdrRaw, Integer>(rdrRaw, 1);
        }
    }).reduceByKey(new Function2<Integer, Integer, Integer>() {
        @Override
        public Integer call(Integer i1, Integer i2) throws Exception {
            return i1 + i2;
        }
    });

    // filter out words with less than threshold occurrences
    JavaPairRDD<RdrRaw, Integer> filtered = countsIp.filter(new Function<Tuple2<RdrRaw, Integer>, Boolean>() {
        @Override
        public Boolean call(Tuple2<RdrRaw, Integer> rdrRawIntegerTuple2) throws Exception {
            return rdrRawIntegerTuple2._2() > threshold;
        }
    });
    JavaPairRDD<Integer, RdrRaw> finalPair = filtered
            .mapToPair(new PairFunction<Tuple2<RdrRaw, Integer>, Integer, RdrRaw>() {
                @Override
                public Tuple2<Integer, RdrRaw> call(Tuple2<RdrRaw, Integer> item) throws Exception {
                    return item.swap();
                }
            }).sortByKey(false);
    //
    List<Tuple2<Integer, RdrRaw>> collect = finalPair.take(10);
    StringBuilder msgBody = new StringBuilder();
    for (Tuple2<Integer, RdrRaw> rdrInTuple2 : collect) {
        RdrRaw rdrRaw = rdrInTuple2._2();
        Integer count = rdrInTuple2._1();
        msgBody.append(rdrRaw.dstHost)
                // .append(rdrRaw.dstParam)
                .append(" found [").append(count).append("]\n");
    }
    Configuration conf = new Configuration();
    try {
        Path p = new Path(args[2]);
        FileSystem fs = FileSystem.get(new Configuration());
        boolean exists = fs.exists(p);
        if (exists) {
            fs.delete(p, true);
        }
        FileSystem hdfs = FileSystem.get(conf);
        FSDataOutputStream out = hdfs.create(p);
        ByteArrayInputStream in = new ByteArrayInputStream(msgBody.toString().getBytes());
        byte buffer[] = new byte[256];
        int bytesRead = 0;
        while ((bytesRead = in.read(buffer)) > 0) {
            out.write(buffer, 0, bytesRead);
        }
        p = new Path(args[2] + "_all");
        if (fs.exists(p)) {
            fs.delete(p, true);
        }
        finalPair.saveAsTextFile(args[2] + "_all");
    } catch (IOException e) {
        e.printStackTrace();
    }

    sc.stop();
    /* Properties props = new Properties();
     props.put("mail.smtps.host","smtp.gmail.com");
     props.put("mail.smtps.auth", "true");
     Session session = Session.getDefaultInstance(props, null);
            
     System.out.println("try send email");
     try {
    Message msg = new MimeMessage(session);
    msg.setFrom(new InternetAddress("spark@hadoop.com", "Spark Generated Message"));
    msg.addRecipient(Message.RecipientType.TO,
            new InternetAddress("fesswoodwork@gmail.com", "Spark Responder"));
    msg.setSubject("Spark task finished");
    msg.setText(msgBody.toString());
    SMTPTransport t =
            (SMTPTransport)session.getTransport("smtps");
    t.connect("smtp.gmail.com", "fesswoodwork", "9610792adc");
    t.sendMessage(msg, msg.getAllRecipients());
    Transport.send(msg);
            
     } catch (AddressException e) {
        e.printStackTrace();
    System.out.println("AddressException "+e.getMessage());
     } catch (MessagingException e) {
    e.printStackTrace();
    System.out.println("MessagingException " + e.getMessage());
     } catch (UnsupportedEncodingException e) {
    e.printStackTrace();
    System.out.println("UnsupportedEncodingException " + e.getMessage());
     }
     System.out.println("sending successfully ends");*/

    /*      // split each document into words
          JavaRDD<String> tokenized = stringJavaRDD.flatMap(
        new FlatMapFunction<String, String>() {
            @Override
            public Iterable<String> call(String s) {
                return Arrays.asList(s.split(" "));
            }
        }
          );
            
          // count the occurrence of each word
          JavaPairRDD<String, Integer> counts = tokenized.mapToPair(
        new PairFunction<String, String, Integer>() {
            @Override
            public Tuple2<String, Integer> call(String s) {
                return new Tuple2<String, Integer>(s, 1);
            }
        }
          ).reduceByKey(
        new Function2<Integer, Integer, Integer>() {
            @Override
            public Integer call(Integer i1, Integer i2) {
                return i1 + i2;
            }
        }
          );
            
          // filter out words with less than threshold occurrences
          JavaPairRDD<String, Integer> filtered = counts.filter(
        new Function<Tuple2<String, Integer>, Boolean>() {
            @Override
            public Boolean call(Tuple2<String, Integer> tup) {
                return tup._2() >= threshold;
            }
        }
          );
            
          // count characters
          JavaPairRDD<Character, Integer> charCounts = filtered.flatMap(
        new FlatMapFunction<Tuple2<String, Integer>, Character>() {
            @Override
            public Iterable<Character> call(Tuple2<String, Integer> s) {
                Collection<Character> chars = new ArrayList<Character>(s._1().length());
                for (char c : s._1().toCharArray()) {
                    chars.add(c);
                }
                return chars;
            }
        }
          ).mapToPair(
        new PairFunction<Character, Character, Integer>() {
            @Override
            public Tuple2<Character, Integer> call(Character c) {
                return new Tuple2<Character, Integer>(c, 1);
            }
        }
          ).reduceByKey(
        new Function2<Integer, Integer, Integer>() {
            @Override
            public Integer call(Integer i1, Integer i2) {
                return i1 + i2;
            }
        }
          );
            
          System.out.println(charCounts.collect());
          */

}

From source file:com.cloudera.sqoop.hive.TestHiveImport.java

License:Apache License

/** Test that we can generate a file containing the DDL and not import. */
@Test//  w ww .j  a  va2 s  . c om
public void testGenerateOnly() throws IOException {
    final String TABLE_NAME = "GenerateOnly";
    setCurTableName(TABLE_NAME);
    setNumCols(1);

    // Figure out where our target generated .q file is going to be.
    SqoopOptions options = getSqoopOptions(getArgv(false, null), new ImportTool());
    Path ddlFile = new Path(new Path(options.getCodeOutputDir()), TABLE_NAME + ".q");
    FileSystem fs = FileSystem.getLocal(new Configuration());

    // If it's already there, remove it before running the test to ensure
    // that it's the current test that generated the file.
    if (fs.exists(ddlFile)) {
        if (!fs.delete(ddlFile, false)) {
            LOG.warn("Could not delete previous ddl file: " + ddlFile);
        }
    }

    // Run a basic import, but specify that we're just generating definitions.
    String[] types = { "INTEGER" };
    String[] vals = { "42" };
    runImportTest(TABLE_NAME, types, vals, null, getCodeGenArgs(), new CodeGenTool());

    // Test that the generated definition file exists.
    assertTrue("Couldn't find expected ddl file", fs.exists(ddlFile));

    Path hiveImportPath = new Path(new Path(options.getWarehouseDir()), TABLE_NAME);
    assertFalse("Import actually happened!", fs.exists(hiveImportPath));
}

From source file:com.cloudera.sqoop.io.TestSplittableBufferedWriter.java

License:Apache License

/** Create the directory where we'll write our test files to; and
 * make sure it has no files in it.//from   www.j av  a  2 s.  c  o  m
 */
private void ensureEmptyWriteDir() throws IOException {
    FileSystem fs = FileSystem.getLocal(getConf());
    Path writeDir = getWritePath();

    fs.mkdirs(writeDir);

    FileStatus[] stats = fs.listStatus(writeDir);

    for (FileStatus stat : stats) {
        if (stat.isDir()) {
            fail("setUp(): Write directory " + writeDir + " contains subdirectories");
        }

        LOG.debug("setUp(): Removing " + stat.getPath());
        if (!fs.delete(stat.getPath(), false)) {
            fail("setUp(): Could not delete residual file " + stat.getPath());
        }
    }

    if (!fs.exists(writeDir)) {
        fail("setUp: Could not create " + writeDir);
    }
}

From source file:com.cloudera.sqoop.lib.TestBlobRef.java

License:Apache License

public void testExternalSubdir() throws IOException {
    final byte[] DATA = { 1, 2, 3, 4, 5 };
    final String FILENAME = "_lob/blobdata";

    try {//from  w  w  w.j  a  va 2  s.c  o m
        doExternalTest(DATA, FILENAME);
    } finally {
        // remove dir we made.
        Configuration conf = new Configuration();
        FileSystem fs = FileSystem.getLocal(conf);
        String tmpDir = System.getProperty("test.build.data", "/tmp/");
        Path lobDir = new Path(new Path(tmpDir), "_lob");
        fs.delete(lobDir, true);
    }
}

From source file:com.cloudera.sqoop.lib.TestBlobRef.java

License:Apache License

private void doExternalTest(final byte[] data, final String filename) throws IOException {

    Configuration conf = new Configuration();
    if (!BaseSqoopTestCase.isOnPhysicalCluster()) {
        conf.set(CommonArgs.FS_DEFAULT_NAME, CommonArgs.LOCAL_FS);
    }/*from w  w w.  ja va 2 s.  c  o  m*/
    FileSystem fs = FileSystem.get(conf);
    String tmpDir = System.getProperty("test.build.data", "/tmp/");

    Path tmpPath = new Path(tmpDir);
    Path blobFile = new Path(tmpPath, filename);

    // make any necessary parent dirs.
    Path blobParent = blobFile.getParent();
    if (!fs.exists(blobParent)) {
        fs.mkdirs(blobParent);
    }

    LobFile.Writer lw = LobFile.create(blobFile, conf, false);
    try {
        long off = lw.tell();
        long len = data.length;
        OutputStream os = lw.writeBlobRecord(len);
        os.write(data, 0, data.length);
        os.close();
        lw.close();

        String refString = "externalLob(lf," + filename + "," + off + "," + len + ")";
        BlobRef blob = BlobRef.parse(refString);
        assertTrue(blob.isExternal());
        assertEquals(refString, blob.toString());
        InputStream is = blob.getDataStream(conf, tmpPath);
        assertNotNull(is);

        byte[] buf = new byte[4096];
        int bytes = is.read(buf, 0, 4096);
        is.close();

        assertEquals(data.length, bytes);
        for (int i = 0; i < bytes; i++) {
            assertEquals(data[i], buf[i]);
        }
    } finally {
        fs.delete(blobFile, false);
    }
}

From source file:com.cloudera.sqoop.lib.TestClobRef.java

License:Apache License

public void testExternalSubdir() throws IOException {
    final String DATA = "This is the clob data!";
    final String FILENAME = "_lob/clobdata";

    try {/*from  w  w  w .jav  a 2 s  . c o  m*/
        doExternalTest(DATA, FILENAME);
    } finally {
        // remove dir we made.
        Configuration conf = new Configuration();
        FileSystem fs = FileSystem.getLocal(conf);
        String tmpDir = System.getProperty("test.build.data", "/tmp/");
        Path lobDir = new Path(new Path(tmpDir), "_lob");
        fs.delete(lobDir, true);
    }
}

From source file:com.cloudera.sqoop.lib.TestClobRef.java

License:Apache License

private void doExternalTest(final String data, final String filename) throws IOException {

    Configuration conf = new Configuration();
    if (!BaseSqoopTestCase.isOnPhysicalCluster()) {
        conf.set(CommonArgs.FS_DEFAULT_NAME, CommonArgs.LOCAL_FS);
    }/*from w  w  w . ja  va  2s  . co  m*/
    FileSystem fs = FileSystem.get(conf);
    String tmpDir = System.getProperty("test.build.data", "/tmp/");

    Path tmpPath = new Path(tmpDir);
    Path clobFile = new Path(tmpPath, filename);

    // make any necessary parent dirs.
    Path clobParent = clobFile.getParent();
    if (!fs.exists(clobParent)) {
        fs.mkdirs(clobParent);
    }

    LobFile.Writer lw = LobFile.create(clobFile, conf, true);
    try {
        long off = lw.tell();
        long len = data.length();
        Writer w = lw.writeClobRecord(len);
        w.append(data);
        w.close();
        lw.close();

        String refString = "externalLob(lf," + filename + "," + off + "," + len + ")";
        ClobRef clob = ClobRef.parse(refString);
        assertTrue(clob.isExternal());
        assertEquals(refString, clob.toString());
        Reader r = clob.getDataStream(conf, tmpPath);
        assertNotNull(r);

        char[] buf = new char[4096];
        int chars = r.read(buf, 0, 4096);
        r.close();

        String str = new String(buf, 0, chars);
        assertEquals(data, str);
    } finally {
        fs.delete(clobFile, false);
    }
}

From source file:com.cloudera.sqoop.lib.TestLargeObjectLoader.java

License:Apache License

public void setUp() throws IOException, InterruptedException {
    conf = new Configuration();
    if (!BaseSqoopTestCase.isOnPhysicalCluster()) {
        conf.set(CommonArgs.FS_DEFAULT_NAME, CommonArgs.LOCAL_FS);
    }/*w ww  .  j av a  2s. com*/
    String tmpDir = System.getProperty("test.build.data", "/tmp/");
    this.outDir = new Path(new Path(tmpDir), "testLobLoader");
    FileSystem fs = FileSystem.get(conf);
    if (fs.exists(outDir)) {
        fs.delete(outDir, true);
    }
    fs.mkdirs(outDir);

    loader = new LargeObjectLoader(conf, outDir);
}