List of usage examples for org.apache.hadoop.fs FileSystem delete
public abstract boolean delete(Path f, boolean recursive) throws IOException;
From source file:com.cloudera.recordservice.examples.mapreduce.WordCount.java
License:Apache License
public void run(String[] args) throws Exception { boolean useRecordService = true; if (args.length == 3) { useRecordService = Boolean.parseBoolean(args[2]); } else if (args.length != 2) { System.err.println("Usage: WordCount <input path> <output path>"); System.exit(-1);//w w w .jav a 2s . c o m } String input = args[0].trim(); String output = args[1]; JobConf conf = new JobConf(WordCount.class); conf.setJobName("wordcount-" + (useRecordService ? "with" : "without") + "-RecordService"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); conf.setMapperClass(Map.class); conf.setCombinerClass(Reduce.class); conf.setReducerClass(Reduce.class); if (useRecordService) { conf.setInputFormat(com.cloudera.recordservice.mapred.TextInputFormat.class); RecordServiceConfig.setInput(conf, input); } else { conf.setInputFormat(TextInputFormat.class); FileInputFormat.setInputPaths(conf, new Path(input)); } FileSystem fs = FileSystem.get(conf); Path outputPath = new Path(output); if (fs.exists(outputPath)) fs.delete(outputPath, true); conf.setOutputFormat(TextOutputFormat.class); FileOutputFormat.setOutputPath(conf, outputPath); JobClient.runJob(conf); System.out.println("Done"); }
From source file:com.cloudera.science.quince.FileUtils.java
License:Open Source License
public static void deleteSampleGroup(Path path, Configuration conf, String sampleGroup) throws IOException { FileSystem fs = path.getFileSystem(conf); if (!fs.exists(path)) { return;//from w w w . j a va2 s . c o m } for (FileStatus chrStatus : fs.listStatus(path, new PartitionPathFilter("chr"))) { for (FileStatus posStatus : fs.listStatus(chrStatus.getPath(), new PartitionPathFilter("pos"))) { for (FileStatus sampleGroupStatus : fs.listStatus(posStatus.getPath(), new PartitionPathFilter("sample_group", sampleGroup))) { fs.delete(sampleGroupStatus.getPath(), true); } } } }
From source file:com.cloudera.sparkwordcount.ipWordCount.java
License:Apache License
public static void main(String[] args) { JavaSparkContext sc = new JavaSparkContext( new SparkConf().set("spark.dynamicAllocation.initialExecutors", "5").setAppName("Spark Count")); // sc.addJar(""); // final Logger logger = Logger.getLogger("org"); // logger.setLevel(Level.INFO); final int threshold = Integer.parseInt(args[1]); JavaRDD<String> stringJavaRDD = sc.textFile(args[0]); JavaRDD<String> filteredRDD = stringJavaRDD.filter(new Function<String, Boolean>() { @Override//from ww w. ja v a2s . c om public Boolean call(String value) throws Exception { if (value.contains("TIME_STAMP")) { return false; } RdrRaw line = RdrParser.parseRdr(value); if (line == null) { System.out.println("can't pars rdr"); return false; } String url = line.dstHost; if (url.trim().isEmpty()) { return false; } //System.out.println(url); return true; } }); JavaPairRDD<RdrRaw, Integer> countsIp = filteredRDD.mapToPair(new PairFunction<String, RdrRaw, Integer>() { @Override public Tuple2<RdrRaw, Integer> call(String s) throws Exception { RdrRaw rdrRaw = RdrParser.parseRdr(s); return new Tuple2<RdrRaw, Integer>(rdrRaw, 1); } }).reduceByKey(new Function2<Integer, Integer, Integer>() { @Override public Integer call(Integer i1, Integer i2) throws Exception { return i1 + i2; } }); // filter out words with less than threshold occurrences JavaPairRDD<RdrRaw, Integer> filtered = countsIp.filter(new Function<Tuple2<RdrRaw, Integer>, Boolean>() { @Override public Boolean call(Tuple2<RdrRaw, Integer> rdrRawIntegerTuple2) throws Exception { return rdrRawIntegerTuple2._2() > threshold; } }); JavaPairRDD<Integer, RdrRaw> finalPair = filtered .mapToPair(new PairFunction<Tuple2<RdrRaw, Integer>, Integer, RdrRaw>() { @Override public Tuple2<Integer, RdrRaw> call(Tuple2<RdrRaw, Integer> item) throws Exception { return item.swap(); } }).sortByKey(false); // List<Tuple2<Integer, RdrRaw>> collect = finalPair.take(10); StringBuilder msgBody = new StringBuilder(); for (Tuple2<Integer, RdrRaw> rdrInTuple2 : collect) { RdrRaw rdrRaw = rdrInTuple2._2(); Integer count = rdrInTuple2._1(); msgBody.append(rdrRaw.dstHost) // .append(rdrRaw.dstParam) .append(" found [").append(count).append("]\n"); } Configuration conf = new Configuration(); try { Path p = new Path(args[2]); FileSystem fs = FileSystem.get(new Configuration()); boolean exists = fs.exists(p); if (exists) { fs.delete(p, true); } FileSystem hdfs = FileSystem.get(conf); FSDataOutputStream out = hdfs.create(p); ByteArrayInputStream in = new ByteArrayInputStream(msgBody.toString().getBytes()); byte buffer[] = new byte[256]; int bytesRead = 0; while ((bytesRead = in.read(buffer)) > 0) { out.write(buffer, 0, bytesRead); } p = new Path(args[2] + "_all"); if (fs.exists(p)) { fs.delete(p, true); } finalPair.saveAsTextFile(args[2] + "_all"); } catch (IOException e) { e.printStackTrace(); } sc.stop(); /* Properties props = new Properties(); props.put("mail.smtps.host","smtp.gmail.com"); props.put("mail.smtps.auth", "true"); Session session = Session.getDefaultInstance(props, null); System.out.println("try send email"); try { Message msg = new MimeMessage(session); msg.setFrom(new InternetAddress("spark@hadoop.com", "Spark Generated Message")); msg.addRecipient(Message.RecipientType.TO, new InternetAddress("fesswoodwork@gmail.com", "Spark Responder")); msg.setSubject("Spark task finished"); msg.setText(msgBody.toString()); SMTPTransport t = (SMTPTransport)session.getTransport("smtps"); t.connect("smtp.gmail.com", "fesswoodwork", "9610792adc"); t.sendMessage(msg, msg.getAllRecipients()); Transport.send(msg); } catch (AddressException e) { e.printStackTrace(); System.out.println("AddressException "+e.getMessage()); } catch (MessagingException e) { e.printStackTrace(); System.out.println("MessagingException " + e.getMessage()); } catch (UnsupportedEncodingException e) { e.printStackTrace(); System.out.println("UnsupportedEncodingException " + e.getMessage()); } System.out.println("sending successfully ends");*/ /* // split each document into words JavaRDD<String> tokenized = stringJavaRDD.flatMap( new FlatMapFunction<String, String>() { @Override public Iterable<String> call(String s) { return Arrays.asList(s.split(" ")); } } ); // count the occurrence of each word JavaPairRDD<String, Integer> counts = tokenized.mapToPair( new PairFunction<String, String, Integer>() { @Override public Tuple2<String, Integer> call(String s) { return new Tuple2<String, Integer>(s, 1); } } ).reduceByKey( new Function2<Integer, Integer, Integer>() { @Override public Integer call(Integer i1, Integer i2) { return i1 + i2; } } ); // filter out words with less than threshold occurrences JavaPairRDD<String, Integer> filtered = counts.filter( new Function<Tuple2<String, Integer>, Boolean>() { @Override public Boolean call(Tuple2<String, Integer> tup) { return tup._2() >= threshold; } } ); // count characters JavaPairRDD<Character, Integer> charCounts = filtered.flatMap( new FlatMapFunction<Tuple2<String, Integer>, Character>() { @Override public Iterable<Character> call(Tuple2<String, Integer> s) { Collection<Character> chars = new ArrayList<Character>(s._1().length()); for (char c : s._1().toCharArray()) { chars.add(c); } return chars; } } ).mapToPair( new PairFunction<Character, Character, Integer>() { @Override public Tuple2<Character, Integer> call(Character c) { return new Tuple2<Character, Integer>(c, 1); } } ).reduceByKey( new Function2<Integer, Integer, Integer>() { @Override public Integer call(Integer i1, Integer i2) { return i1 + i2; } } ); System.out.println(charCounts.collect()); */ }
From source file:com.cloudera.sqoop.hive.TestHiveImport.java
License:Apache License
/** Test that we can generate a file containing the DDL and not import. */ @Test// w ww .j a va2 s . c om public void testGenerateOnly() throws IOException { final String TABLE_NAME = "GenerateOnly"; setCurTableName(TABLE_NAME); setNumCols(1); // Figure out where our target generated .q file is going to be. SqoopOptions options = getSqoopOptions(getArgv(false, null), new ImportTool()); Path ddlFile = new Path(new Path(options.getCodeOutputDir()), TABLE_NAME + ".q"); FileSystem fs = FileSystem.getLocal(new Configuration()); // If it's already there, remove it before running the test to ensure // that it's the current test that generated the file. if (fs.exists(ddlFile)) { if (!fs.delete(ddlFile, false)) { LOG.warn("Could not delete previous ddl file: " + ddlFile); } } // Run a basic import, but specify that we're just generating definitions. String[] types = { "INTEGER" }; String[] vals = { "42" }; runImportTest(TABLE_NAME, types, vals, null, getCodeGenArgs(), new CodeGenTool()); // Test that the generated definition file exists. assertTrue("Couldn't find expected ddl file", fs.exists(ddlFile)); Path hiveImportPath = new Path(new Path(options.getWarehouseDir()), TABLE_NAME); assertFalse("Import actually happened!", fs.exists(hiveImportPath)); }
From source file:com.cloudera.sqoop.io.TestSplittableBufferedWriter.java
License:Apache License
/** Create the directory where we'll write our test files to; and * make sure it has no files in it.//from www.j av a 2 s. c o m */ private void ensureEmptyWriteDir() throws IOException { FileSystem fs = FileSystem.getLocal(getConf()); Path writeDir = getWritePath(); fs.mkdirs(writeDir); FileStatus[] stats = fs.listStatus(writeDir); for (FileStatus stat : stats) { if (stat.isDir()) { fail("setUp(): Write directory " + writeDir + " contains subdirectories"); } LOG.debug("setUp(): Removing " + stat.getPath()); if (!fs.delete(stat.getPath(), false)) { fail("setUp(): Could not delete residual file " + stat.getPath()); } } if (!fs.exists(writeDir)) { fail("setUp: Could not create " + writeDir); } }
From source file:com.cloudera.sqoop.lib.TestBlobRef.java
License:Apache License
public void testExternalSubdir() throws IOException { final byte[] DATA = { 1, 2, 3, 4, 5 }; final String FILENAME = "_lob/blobdata"; try {//from w w w.j a va 2 s.c o m doExternalTest(DATA, FILENAME); } finally { // remove dir we made. Configuration conf = new Configuration(); FileSystem fs = FileSystem.getLocal(conf); String tmpDir = System.getProperty("test.build.data", "/tmp/"); Path lobDir = new Path(new Path(tmpDir), "_lob"); fs.delete(lobDir, true); } }
From source file:com.cloudera.sqoop.lib.TestBlobRef.java
License:Apache License
private void doExternalTest(final byte[] data, final String filename) throws IOException { Configuration conf = new Configuration(); if (!BaseSqoopTestCase.isOnPhysicalCluster()) { conf.set(CommonArgs.FS_DEFAULT_NAME, CommonArgs.LOCAL_FS); }/*from w w w. ja va 2 s. c o m*/ FileSystem fs = FileSystem.get(conf); String tmpDir = System.getProperty("test.build.data", "/tmp/"); Path tmpPath = new Path(tmpDir); Path blobFile = new Path(tmpPath, filename); // make any necessary parent dirs. Path blobParent = blobFile.getParent(); if (!fs.exists(blobParent)) { fs.mkdirs(blobParent); } LobFile.Writer lw = LobFile.create(blobFile, conf, false); try { long off = lw.tell(); long len = data.length; OutputStream os = lw.writeBlobRecord(len); os.write(data, 0, data.length); os.close(); lw.close(); String refString = "externalLob(lf," + filename + "," + off + "," + len + ")"; BlobRef blob = BlobRef.parse(refString); assertTrue(blob.isExternal()); assertEquals(refString, blob.toString()); InputStream is = blob.getDataStream(conf, tmpPath); assertNotNull(is); byte[] buf = new byte[4096]; int bytes = is.read(buf, 0, 4096); is.close(); assertEquals(data.length, bytes); for (int i = 0; i < bytes; i++) { assertEquals(data[i], buf[i]); } } finally { fs.delete(blobFile, false); } }
From source file:com.cloudera.sqoop.lib.TestClobRef.java
License:Apache License
public void testExternalSubdir() throws IOException { final String DATA = "This is the clob data!"; final String FILENAME = "_lob/clobdata"; try {/*from w w w .jav a 2 s . c o m*/ doExternalTest(DATA, FILENAME); } finally { // remove dir we made. Configuration conf = new Configuration(); FileSystem fs = FileSystem.getLocal(conf); String tmpDir = System.getProperty("test.build.data", "/tmp/"); Path lobDir = new Path(new Path(tmpDir), "_lob"); fs.delete(lobDir, true); } }
From source file:com.cloudera.sqoop.lib.TestClobRef.java
License:Apache License
private void doExternalTest(final String data, final String filename) throws IOException { Configuration conf = new Configuration(); if (!BaseSqoopTestCase.isOnPhysicalCluster()) { conf.set(CommonArgs.FS_DEFAULT_NAME, CommonArgs.LOCAL_FS); }/*from w w w . ja va 2s . co m*/ FileSystem fs = FileSystem.get(conf); String tmpDir = System.getProperty("test.build.data", "/tmp/"); Path tmpPath = new Path(tmpDir); Path clobFile = new Path(tmpPath, filename); // make any necessary parent dirs. Path clobParent = clobFile.getParent(); if (!fs.exists(clobParent)) { fs.mkdirs(clobParent); } LobFile.Writer lw = LobFile.create(clobFile, conf, true); try { long off = lw.tell(); long len = data.length(); Writer w = lw.writeClobRecord(len); w.append(data); w.close(); lw.close(); String refString = "externalLob(lf," + filename + "," + off + "," + len + ")"; ClobRef clob = ClobRef.parse(refString); assertTrue(clob.isExternal()); assertEquals(refString, clob.toString()); Reader r = clob.getDataStream(conf, tmpPath); assertNotNull(r); char[] buf = new char[4096]; int chars = r.read(buf, 0, 4096); r.close(); String str = new String(buf, 0, chars); assertEquals(data, str); } finally { fs.delete(clobFile, false); } }
From source file:com.cloudera.sqoop.lib.TestLargeObjectLoader.java
License:Apache License
public void setUp() throws IOException, InterruptedException { conf = new Configuration(); if (!BaseSqoopTestCase.isOnPhysicalCluster()) { conf.set(CommonArgs.FS_DEFAULT_NAME, CommonArgs.LOCAL_FS); }/*w ww . j av a 2s. com*/ String tmpDir = System.getProperty("test.build.data", "/tmp/"); this.outDir = new Path(new Path(tmpDir), "testLobLoader"); FileSystem fs = FileSystem.get(conf); if (fs.exists(outDir)) { fs.delete(outDir, true); } fs.mkdirs(outDir); loader = new LargeObjectLoader(conf, outDir); }