List of usage examples for org.apache.hadoop.fs FileSystem close
@Override public void close() throws IOException
From source file:com.streamsets.pipeline.stage.destination.hdfs.writer.TestRecordWriter.java
License:Apache License
private void testSequenceFile(boolean useUUIDAsKey) throws Exception { String keyEL = (useUUIDAsKey) ? "${uuid()}" : "${record:value('/')}"; FileSystem fs = getRawLocalFileSystem(); try {//from ww w. ja va2s . co m Path file = new Path(getTestDir(), "file.txt"); SequenceFile.Writer seqFile = SequenceFile.createWriter(fs, new HdfsConfiguration(), file, Text.class, Text.class, SequenceFile.CompressionType.NONE, (CompressionCodec) null); long timeToLive = 10000; long expires = System.currentTimeMillis() + timeToLive; RecordWriter writer = new RecordWriter(file, timeToLive, seqFile, keyEL, new DummyDataGeneratorFactory(null), ContextInfoCreator.createTargetContext(HdfsDTarget.class, "testWritersLifecycle", false, OnRecordError.TO_ERROR, null)); Assert.assertFalse(writer.isTextFile()); Assert.assertTrue(writer.isSeqFile()); Assert.assertEquals(file, writer.getPath()); Assert.assertTrue(expires <= writer.getExpiresOn()); Assert.assertTrue(writer.toString().contains(file.toString())); Record record = RecordCreator.create(); record.set(Field.create("a")); writer.write(record); record.set(Field.create("z")); writer.write(record); Assert.assertFalse(writer.isClosed()); writer.flush(); Assert.assertTrue(writer.getLength() > 4); Assert.assertEquals(2, writer.getRecords()); writer.close(); Assert.assertTrue(writer.isClosed()); try { writer.write(record); Assert.fail(); } catch (IOException ex) { //NOP } SequenceFile.Reader reader = new SequenceFile.Reader(fs, file, new HdfsConfiguration()); Text key = new Text(); Text value = new Text(); Assert.assertTrue(reader.next(key, value)); if (useUUIDAsKey) { Assert.assertNotNull(UUID.fromString(key.toString())); } else { Assert.assertEquals("a", key.toString()); } Assert.assertEquals("a", value.toString().trim()); Assert.assertTrue(reader.next(key, value)); if (useUUIDAsKey) { Assert.assertNotNull(UUID.fromString(key.toString())); } else { Assert.assertEquals("z", key.toString()); } Assert.assertEquals("z", value.toString().trim()); Assert.assertFalse(reader.next(key, value)); reader.close(); } finally { fs.close(); } }
From source file:com.thinkbiganalytics.nifi.v2.hdfs.AbstractHadoopProcessor.java
License:Apache License
@OnStopped public final void abstractOnStopped() { HdfsResources hdfs = hdfsResources.get(); if (hdfs != null) { FileSystem fs = hdfsResources.get().getFileSystem(); if (fs != null) { try { getLog().info("Processor Stop in progress. Will release HDFS resources."); fs.close(); } catch (IOException e) { getLog().error("Received IOException when attempting to close HDFS FileSystem handle"); }/*from www . ja v a 2s . co m*/ } } hdfsResources.set(new HdfsResources(null, null, null)); }
From source file:com.toddbodnar.simpleHive.IO.hdfsFile.java
public static hdfsFile transferToHDFS(file f) throws IOException { if (f.getClass().equals(hdfsFile.class))//if the file to be put on hdfs is already on hdfs return (hdfsFile) f; //just return the file FileSystem fs = FileSystem.get(GetConfiguration.get()); Path theFile;/* w w w . j a v a 2 s . c o m*/ do { theFile = new Path(settings.hdfs_prefix + "/LOCAL_TABLE_" + System.currentTimeMillis() + "_" + Math.round(Math.random() * 10000)); } while (fs.exists(theFile)); BufferedWriter out = new BufferedWriter(new OutputStreamWriter(fs.create(theFile))); f.resetStream(); while (f.hasNext()) { out.write(f.readNextLine() + "\n"); } out.close(); fs.close(); return new hdfsFile(theFile); }
From source file:com.toddbodnar.simpleHive.IO.hdfsFileTest.java
@BeforeClass public static void setUp() { try {/* w w w .j ava 2 s .c o m*/ Path testFile = new Path("hdfs://localhost:8020///" + settings.hdfs_prefix + "/hdfsFileTest.csv"); FileSystem fs = FileSystem.get(GetConfiguration.get()); if (fs.exists(testFile)) { fs.delete(testFile, true); } BufferedWriter out = new BufferedWriter(new OutputStreamWriter(fs.create(testFile))); out.write("hello\nworld!\n"); out.close(); fs.close(); } catch (Exception ex) { System.err.println("Could not create file on HDFS: " + ex); nohdfs = true; } }
From source file:com.twitter.elephanttwin.util.HdfsUtils.java
License:Apache License
/** * Tests to see if an HDFS cluster is currently reachable. * * @param hdfsNameNode the namenode to test for reachability * @return {@code true} if the name node is reachable. *///from w w w .ja va 2s. c o m public static boolean reachable(String hdfsNamenode) { Configuration config = new Configuration(); config.set(CommonConfigurationKeys.FS_DEFAULT_NAME_KEY, hdfsNamenode); FileSystem fs; try { fs = FileSystem.get(config); if (fs != null) { fs.close(); return true; } return false; } catch (IOException e) { return false; } }
From source file:com.verizon.Main.java
public static void main(String[] args) throws Exception { String warehouseLocation = "file:" + System.getProperty("user.dir") + "spark-warehouse"; SparkSession spark = SparkSession.builder().appName("Verizon").config("spark.master", "local[2]") .config("spark.sql.warehouse.dir", warehouseLocation).enableHiveSupport().getOrCreate(); Configuration configuration = new Configuration(); configuration.addResource(new Path(System.getProperty("HADOOP_INSTALL") + "/conf/core-site.xml")); configuration.addResource(new Path(System.getProperty("HADOOP_INSTALL") + "/conf/hdfs-site.xml")); configuration.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName()); configuration.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName()); FileSystem hdfs = FileSystem.get(new URI("hdfs://localhost:9000"), configuration); SQLContext context = new SQLContext(spark); String schemaString = " Device,Title,ReviewText,SubmissionTime,UserNickname"; //spark.read().textFile(schemaString) Dataset<Row> df = spark.read().csv("hdfs://localhost:9000/data.csv"); //df.show();//from w ww . j av a 2s . c o m //#df.printSchema(); df = df.select("_c2"); Path file = new Path("hdfs://localhost:9000/tempFile.txt"); if (hdfs.exists(file)) { hdfs.delete(file, true); } df.write().csv("hdfs://localhost:9000/tempFile.txt"); JavaRDD<String> lines = spark.read().textFile("hdfs://localhost:9000/tempFile.txt").javaRDD(); JavaRDD<String> words = lines.flatMap(new FlatMapFunction<String, String>() { @Override public Iterator<String> call(String s) { return Arrays.asList(SPACE.split(s)).iterator(); } }); JavaPairRDD<String, Integer> ones = words.mapToPair(new PairFunction<String, String, Integer>() { @Override public Tuple2<String, Integer> call(String s) { s = s.replaceAll("[^a-zA-Z0-9]+", ""); s = s.toLowerCase().trim(); return new Tuple2<>(s, 1); } }); JavaPairRDD<String, Integer> counts = ones.reduceByKey(new Function2<Integer, Integer, Integer>() { @Override public Integer call(Integer i1, Integer i2) { return i1 + i2; } }); JavaPairRDD<Integer, String> frequencies = counts .mapToPair(new PairFunction<Tuple2<String, Integer>, Integer, String>() { @Override public Tuple2<Integer, String> call(Tuple2<String, Integer> s) { return new Tuple2<Integer, String>(s._2, s._1); } }); frequencies = frequencies.sortByKey(false); JavaPairRDD<String, Integer> result = frequencies .mapToPair(new PairFunction<Tuple2<Integer, String>, String, Integer>() { @Override public Tuple2<String, Integer> call(Tuple2<Integer, String> s) throws Exception { return new Tuple2<String, Integer>(s._2, s._1); } }); //JavaPairRDD<Integer,String> sortedByFreq = sort(frequencies, "descending"); file = new Path("hdfs://localhost:9000/allresult.csv"); if (hdfs.exists(file)) { hdfs.delete(file, true); } //FileUtils.deleteDirectory(new File("allresult.csv")); result.saveAsTextFile("hdfs://localhost:9000/allresult.csv"); List<Tuple2<String, Integer>> output = result.take(250); ExportToHive hiveExport = new ExportToHive(); String rows = ""; for (Tuple2<String, Integer> tuple : output) { String date = new Date().toString(); String keyword = tuple._1(); Integer count = tuple._2(); //System.out.println( keyword+ "," +count); rows += date + "," + "Samsung Galaxy s7," + keyword + "," + count + System.lineSeparator(); } //System.out.println(rows); /* file = new Path("hdfs://localhost:9000/result.csv"); if ( hdfs.exists( file )) { hdfs.delete( file, true ); } OutputStream os = hdfs.create(file); BufferedWriter br = new BufferedWriter( new OutputStreamWriter( os, "UTF-8" ) ); br.write(rows); br.close(); */ hdfs.close(); FileUtils.deleteQuietly(new File("result.csv")); FileUtils.writeStringToFile(new File("result.csv"), rows); hiveExport.writeToHive(spark); ExportDataToServer exportServer = new ExportDataToServer(); exportServer.sendDataToRESTService(rows); spark.stop(); }
From source file:com.yahoo.semsearch.fastlinking.io.Datapack.java
License:Apache License
private void merge(String anchorMapPath, String dfMapPath, String multiple_out, String out, String ngram) throws IOException { JobConf conf = new JobConf(getConf(), Datapack.class); FileSystem fs = FileSystem.get(conf); BufferedWriter anchorsDataOut; BufferedWriter anchorsTSVOut; Boolean multiple_output = (multiple_out != null && multiple_out.equalsIgnoreCase("true")); Boolean ngram_output = (ngram != null && ngram.equalsIgnoreCase("true")); if (!multiple_output) { anchorsDataOut = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(out), outputEncoding)); anchorsTSVOut = null;//from w ww .j a v a 2 s .co m } else { anchorsDataOut = new BufferedWriter( new OutputStreamWriter(new FileOutputStream(out + ".dat"), outputEncoding)); anchorsTSVOut = new BufferedWriter( new OutputStreamWriter(new FileOutputStream(out + ".tsv"), outputEncoding)); } // Loop over anchors MapFile.Reader anchorMapReader = new MapFile.Reader(new Path(anchorMapPath + "/part-00000"), conf); MapFile.Reader dfMapReader = new MapFile.Reader(new Path(dfMapPath + "/part-00000"), conf); /*FileStatus[] status = fs.listStatus( new Path( dfMapPath ) ); // you need to pass in your hdfs path for( FileStatus fileStatus : status ) { if( !fileStatus.getPath().toString().contains( "part-" )) continue; MapFile.Reader dfMapReader = new MapFile.Reader( fileStatus.getPath(), conf ); */ Text akey = new Text(); Text dkey = new Text(); IntWritable df = new IntWritable(); HMapSIW map = new HMapSIW(); while (anchorMapReader.next(akey, map)) { // since they are both sorted we can just iterate over both // TODO if need be, artificially add a 0 count to unseen anchors dfMapReader.next(dkey, df); while (!akey.toString().equalsIgnoreCase(dkey.toString())) { //System.err.println("Mismatch: '" + akey + "' and '" + dkey + "'"); anchorMapReader.next(akey, map); } String l = akey.toString(); // while( dfMapReader.next( dkey, df ) ) { // String l = dkey.toString(); if (l.trim().length() < 2) continue; StringBuilder targets = new StringBuilder(); int total = 0; for (String target : map.keySet()) { int count = map.get(target); total += count; String entity = URLEncoder.encode(target.replaceAll(" ", "_"), "UTF-8"); targets.append(entity); targets.append(SEPARATOR); targets.append(Integer.toString(count)); targets.append("\t"); } if (StringUtils.isNumeric(l) && total < 2) continue; //System.err.println("targets " + targets); if (targets.length() < 2) continue; if (!ngram_output) { anchorsDataOut.write(l); anchorsDataOut.write(SEPARATOR); anchorsDataOut.write(Integer.toString(df.get())); anchorsDataOut.write(SEPARATOR); anchorsDataOut.write(Integer.toString(total)); anchorsDataOut.write("\t"); anchorsDataOut.write(targets.substring(0, targets.length() - 1)); anchorsDataOut.write("\n"); anchorsDataOut.flush(); if (multiple_output) { for (String target : map.keySet()) { int count = map.get(target); String entity = URLEncoder.encode(target.replaceAll(" ", "_"), "UTF-8"); anchorsTSVOut.write(l); anchorsTSVOut.write("\t"); anchorsTSVOut.write(Integer.toString(df.get())); anchorsTSVOut.write("\t"); anchorsTSVOut.write(Integer.toString(total)); anchorsTSVOut.write("\t"); anchorsTSVOut.write(entity); anchorsTSVOut.write("\t"); anchorsTSVOut.write(Integer.toString(count)); anchorsTSVOut.write("\n"); anchorsTSVOut.flush(); } } } else { String parts[] = l.split("\\s+"); for (int i = 0; i < parts.length; i++) { StringBuilder sb = new StringBuilder(); for (int j = i; j < parts.length; j++) { sb.append(parts[j]); String ss = sb.toString(); anchorsDataOut.write(ss); anchorsDataOut.write(SEPARATOR); anchorsDataOut.write(Integer.toString(df.get())); anchorsDataOut.write(SEPARATOR); anchorsDataOut.write(Integer.toString(total)); anchorsDataOut.write("\t"); anchorsDataOut.write(targets.substring(0, targets.length() - 1)); anchorsDataOut.write("\n"); anchorsDataOut.flush(); if (multiple_output) { for (String target : map.keySet()) { int count = map.get(target); String entity = URLEncoder.encode(target.replaceAll(" ", "_"), "UTF-8"); anchorsTSVOut.write(ss); anchorsTSVOut.write("\t"); anchorsTSVOut.write(Integer.toString(df.get())); anchorsTSVOut.write("\t"); anchorsTSVOut.write(Integer.toString(total)); anchorsTSVOut.write("\t"); anchorsTSVOut.write(entity); anchorsTSVOut.write("\t"); anchorsTSVOut.write(Integer.toString(count)); anchorsTSVOut.write("\n"); anchorsTSVOut.flush(); } sb.append(" "); } } } } } dfMapReader.close(); //} anchorsDataOut.close(); if (multiple_output) { anchorsTSVOut.close(); } //anchorMapReader.close(); fs.close(); }
From source file:com.yahoo.semsearch.fastlinking.io.ExtractWikipediaAnchorText.java
License:Apache License
private void merge(String anchorMapPath, String dfMapPath) throws IOException { LOG.info("Extracting anchor text (merge)..."); LOG.info(" - input: " + anchorMapPath); LOG.info(" - output: " + dfMapPath); JobConf conf = new JobConf(getConf(), ExtractWikipediaAnchorText.class); FileSystem fs = FileSystem.get(conf); // Loop over anchors MapFile.Reader anchorMapReader = new MapFile.Reader(new Path(anchorMapPath + "/part-00000"), conf); MapFile.Reader dfMapReader = new MapFile.Reader(new Path(dfMapPath + "/part-00000"), conf); // IntWritable key = new IntWritable(Integer.parseInt(cmdline.getArgs()[0])); // System.out.println(key.toString()); Text key = new Text(); IntWritable df = new IntWritable(); while (dfMapReader.next(key, df)) { //if (!key.toString().equalsIgnoreCase("Jim Durham")) // continue; HMapSIW map = new HMapSIW(); anchorMapReader.get(key, map);/* ww w. j a va 2s . c o m*/ System.out.println(key + "\t" + df + "\t" + map.toString()); // for (String entity : map.keySet()) { // System.out.println("\t" + entity + "\t" + map.get(entity) + "\n"); // } break; } anchorMapReader.close(); dfMapReader.close(); fs.close(); }
From source file:dataload.HDFSReaderExternal.java
License:Apache License
public ArrayList<String> readFile(String fileName) throws FileNotFoundException { ArrayList<String> output = new ArrayList<String>(); try {//from w ww . j av a 2s .co m Configuration conf = new Configuration(); FileSystem hdfs = FileSystem.get(java.net.URI.create(fileName), conf); FSDataInputStream in = hdfs.open(new Path(fileName)); BufferedReader br = new BufferedReader(new InputStreamReader(in)); String strLine; while ((strLine = br.readLine()) != null) { System.out.println(strLine); output.add(strLine); } hdfs.close(); } catch (IOException e) { e.printStackTrace(); System.err.println("Error: " + e.getMessage()); } return output; }
From source file:eagle.dataproc.impl.storm.hdfs.DataCollectionHDFSSpout.java
License:Apache License
public void copyFiles() { LOG.info("Inside listFiles()"); Configuration conf = new Configuration(); // _____________ TO TEST THAT CORRECT HADOOP JARs ARE INCLUDED __________________ ClassLoader cl = ClassLoader.getSystemClassLoader(); URL[] urls = ((URLClassLoader) cl).getURLs(); if (LOG.isDebugEnabled()) { for (URL url : urls) { LOG.debug(url.getFile());//from w w w .ja v a2s .co m } } // _________________________________________ String hdfsConnectionStr = configContext.getString("dataSourceConfig.hdfsConnnection"); LOG.info("HDFS connection string: " + hdfsConnectionStr); String hdfsPath = configContext.getString("dataSourceConfig.hdfsPath"); LOG.info("HDFS path: " + hdfsPath); String copyToPath = configContext.getString("dataSourceConfig.copyToPath"); LOG.info("copyToPath: " + copyToPath); String srcPathStr = new String("hdfs://" + hdfsConnectionStr + hdfsPath); Path srcPath = new Path(srcPathStr); LOG.info("listFiles called"); LOG.info("srcPath: " + srcPath); try { FileSystem fs = srcPath.getFileSystem(conf); /*CompressionCodecFactory codecFactory = new CompressionCodecFactory(conf); CompressionCodec codec = codecFactory.getCodec(srcPath); DataInputStream inputStream = new DataInputStream(codec.createInputStream(fs.open(srcPath))); */ Path destPath = new Path(copyToPath); LOG.info("Destination path: " + destPath); fs.copyToLocalFile(srcPath, destPath); LOG.info("Copy to local succeed"); fs.close(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } }