List of usage examples for org.apache.hadoop.fs FileSystem append
public FSDataOutputStream append(Path f) throws IOException
From source file:acromusashi.stream.bolt.hdfs.HdfsStreamWriter.java
License:Open Source License
/** * ??HDFS??Open?//from ww w . ja v a2s .c om * * @param filePath HDFS * @param fs * @param isFileSyncEachTime ????????? * @throws IOException Open */ public void open(String filePath, FileSystem fs, boolean isFileSyncEachTime) throws IOException { Path dstPath = new Path(filePath); if (fs.exists(dstPath) == true) { this.delegateStream = fs.append(dstPath); } else { this.delegateStream = fs.create(dstPath); } this.isFileSyncEachTime = isFileSyncEachTime; }
From source file:cc.solr.lucene.store.hdfs.ConvertDirectory.java
License:Apache License
public static void convert(Path path) throws IOException { FileSystem fileSystem = FileSystem.get(path.toUri(), new Configuration()); if (!fileSystem.exists(path)) { System.out.println(path + " does not exists."); return;//from w ww .jav a2s .c o m } FileStatus fileStatus = fileSystem.getFileStatus(path); if (fileStatus.isDir()) { FileStatus[] listStatus = fileSystem.listStatus(path); for (FileStatus status : listStatus) { convert(status.getPath()); } } else { System.out.println("Converting file [" + path + "]"); HdfsMetaBlock block = new HdfsMetaBlock(); block.realPosition = 0; block.logicalPosition = 0; block.length = fileStatus.getLen(); FSDataOutputStream outputStream = fileSystem.append(path); block.write(outputStream); outputStream.writeInt(1); outputStream.writeLong(fileStatus.getLen()); outputStream.writeInt(HdfsFileWriter.VERSION); outputStream.close(); } }
From source file:com.adsame.samelogs.SameLogsSink.java
License:Apache License
@SuppressWarnings("rawtypes") @Override//from w w w.j av a 2 s. co m public void append(Event e) throws IOException { // append the event to the output byte[] fn = e.get(TailSource.A_TAILSRCFILE); byte[] bd = e.getBody(); System.out.println("##" + new String(fn) + "##" + new String(bd)); Map<String, byte[]> maps = e.getAttrs(); Iterator iter = maps.entrySet().iterator(); while (iter.hasNext()) { Map.Entry entry = (Map.Entry) iter.next(); String key = (String) entry.getKey(); System.out.println("key: " + key); } // here we are assuming the body is a string pw.println(new String(e.getBody())); pw.flush(); // so we can see it in the file right away Configuration configuration = new Configuration(); FileSystem hdfsFileSystem = FileSystem.get(configuration); Path path = new Path("hdfs://nodie-Ubuntu4:9000/user/nodie/input/dfs/hello"); FSDataOutputStream out; System.out.println("exists: " + hdfsFileSystem.exists(path)); if (hdfsFileSystem.exists(path)) { out = hdfsFileSystem.append(path); } else { out = hdfsFileSystem.create(path); } out.write(e.getBody()); out.writeChar('\n'); out.flush(); out.close(); }
From source file:com.alibaba.jstorm.hdfs.spout.FileLock.java
License:Apache License
private FileLock(FileSystem fs, Path lockFile, String spoutId, LogEntry entry) throws IOException { this.fs = fs; this.lockFile = lockFile; this.lockFileStream = fs.append(lockFile); this.componentID = spoutId; LOG.info("Acquired abandoned lockFile {}, Spout {}", lockFile, spoutId); logProgress(entry.fileOffset, true); }
From source file:com.awcoleman.StandaloneJava.AvroCombinerByBlock.java
License:Apache License
public AvroCombinerByBlock(String inDirStr, String outDirStr, String handleExisting) throws IOException { //handle both an output directory and an output filename (ending with .avro) String outputFilename = DEFAULTOUTPUTFILENAME; if (outDirStr.endsWith(".avro")) { isOutputNameSpecifiedAndAFile = true; //String[] outputParts = outDirStr.split(":?\\\\"); String[] outputParts = outDirStr.split("/"); outputFilename = outputParts[outputParts.length - 1]; //remove outputFilename from outDirStr to get new outDirStr which is just directory (and trailing /) outDirStr = outDirStr.replaceAll(Pattern.quote(outputFilename), ""); outDirStr = outDirStr.substring(0, outDirStr.length() - (outDirStr.endsWith("/") ? 1 : 0)); }/*from w w w. j a va 2 s. co m*/ //Get block size - not needed //long hdfsBlockSize = getBlockSize(); //System.out.println("HDFS FS block size: "+hdfsBlockSize); //Get list of input files ArrayList<FileStatus> inputFileList = new ArrayList<FileStatus>(); Configuration conf = new Configuration(); conf.addResource(new Path("/etc/hadoop/conf/core-site.xml")); conf.set("dfs.replication", "1"); //see http://stackoverflow.com/questions/24548699/how-to-append-to-an-hdfs-file-on-an-extremely-small-cluster-3-nodes-or-less FileSystem hdfs = null; try { hdfs = FileSystem.get(conf); } catch (java.io.IOException ioe) { System.out.println("Error opening HDFS filesystem. Exiting. Error message: " + ioe.getMessage()); System.exit(1); } if (hdfs.getStatus() == null) { System.out.println("Unable to contact HDFS filesystem. Exiting."); System.exit(1); } //Check if input and output dirs exist Path inDir = new Path(inDirStr); Path outDir = new Path(outDirStr); if (!(hdfs.exists(inDir) || hdfs.isDirectory(inDir))) { System.out.println("Input directory ( " + inDirStr + " ) not found or is not directory. Exiting."); System.exit(1); } if (!(hdfs.exists(outDir) || hdfs.isDirectory(outDir))) { if (hdfs.exists(outDir)) { //outDir exists and is a symlink or file, must die System.out.println("Requested output directory name ( " + outDirStr + " ) exists but is not a directory. Exiting."); System.exit(1); } else { hdfs.mkdirs(outDir); } } RemoteIterator<LocatedFileStatus> fileStatusListIterator = hdfs.listFiles(inDir, true); while (fileStatusListIterator.hasNext()) { LocatedFileStatus fileStatus = fileStatusListIterator.next(); if (fileStatus.isFile() && !fileStatus.getPath().getName().equals("_SUCCESS")) { inputFileList.add((FileStatus) fileStatus); } } if (inputFileList.size() <= 1 && !isOutputNameSpecifiedAndAFile) { //If an output file is specified assume we just want a rename. System.out.println("Only one or zero files found in input directory ( " + inDirStr + " ). Exiting."); System.exit(1); } //Get Schema and Compression Codec from seed file since we need it for the writer Path firstFile = inputFileList.get(0).getPath(); FsInput fsin = new FsInput(firstFile, conf); DataFileReader<Object> dfrFirstFile = new DataFileReader<Object>(fsin, new GenericDatumReader<Object>()); Schema fileSchema = dfrFirstFile.getSchema(); String compCodecName = dfrFirstFile.getMetaString("avro.codec"); //compCodecName should be null, deflate, snappy, or bzip2 if (compCodecName == null) { compCodecName = "deflate"; //set to deflate even though original is no compression } dfrFirstFile.close(); //Create Empty HDFS file in output dir String seedFileStr = outDirStr + "/" + outputFilename; Path seedFile = new Path(seedFileStr); FSDataOutputStream hdfsdos = null; try { hdfsdos = hdfs.create(seedFile, false); } catch (org.apache.hadoop.fs.FileAlreadyExistsException faee) { if (handleExisting.equals("overwrite")) { hdfs.delete(seedFile, false); hdfsdos = hdfs.create(seedFile, false); } else if (handleExisting.equals("append")) { hdfsdos = hdfs.append(seedFile); } else { System.out .println("File " + seedFileStr + " exists and will not overwrite. handleExisting is set to " + handleExisting + ". Exiting."); System.exit(1); } } if (hdfsdos == null) { System.out.println("Unable to create or write to output file ( " + seedFileStr + " ). handleExisting is set to " + handleExisting + ". Exiting."); System.exit(1); } //Append other files GenericDatumWriter gdw = new GenericDatumWriter(fileSchema); DataFileWriter dfwBase = new DataFileWriter(gdw); //Set compression to that found in the first file dfwBase.setCodec(CodecFactory.fromString(compCodecName)); DataFileWriter dfw = dfwBase.create(fileSchema, hdfsdos); for (FileStatus thisFileStatus : inputFileList) { //_SUCCESS files are 0 bytes if (thisFileStatus.getLen() == 0) { continue; } FsInput fsin1 = new FsInput(thisFileStatus.getPath(), conf); DataFileReader dfr = new DataFileReader<Object>(fsin1, new GenericDatumReader<Object>()); dfw.appendAllFrom(dfr, false); dfr.close(); } dfw.close(); dfwBase.close(); }
From source file:com.cloudera.hoop.client.fs.TestHoopFileSystem.java
License:Open Source License
private void testAppend() throws Exception { FileSystem fs = FileSystem.get(getHadoopConf()); Path path = new Path(getHadoopTestDir(), "foo.txt"); OutputStream os = fs.create(path); os.write(1);/*from w w w . j av a 2 s. c om*/ os.close(); fs.close(); Configuration conf = new Configuration(); conf.set("fs.http.impl", HoopFileSystem.class.getName()); fs = FileSystem.get(getJettyURL().toURI(), conf); os = fs.append(new Path(path.toUri().getPath())); os.write(2); os.close(); fs.close(); fs = FileSystem.get(getHadoopConf()); InputStream is = fs.open(path); Assert.assertEquals(is.read(), 1); Assert.assertEquals(is.read(), 2); Assert.assertEquals(is.read(), -1); is.close(); fs.close(); }
From source file:com.cloudera.nav.plugin.client.writer.MetadataWriterFactory.java
License:Apache License
private OutputStream createHdfsStream(PluginConfigurations config) { try {/*from www. j a va 2 s .com*/ FileSystem fs = FileSystem.get(config.getHadoopConfigurations()); Path path = new Path(getFilePath(config.getMetadataParentUriString())); if (fs.exists(path)) { return fs.append(path); } // TODO block sizes, replication counts etc return fs.create(path); } catch (IOException e) { throw Throwables.propagate(e); } }
From source file:com.cloudera.nav.sdk.client.writer.MetadataWriterFactory.java
License:Apache License
private OutputStream createHdfsStream() { try {// w w w. java2s . c o m FileSystem fs = FileSystem.get(config.getHadoopConfigurations()); Path path = new Path(getFilePath(config.getMetadataParentUriString())); if (fs.exists(path)) { return fs.append(path); } // TODO block sizes, replication counts etc return fs.create(path); } catch (IOException e) { throw Throwables.propagate(e); } }
From source file:com.example.sparkservice.SparkService.java
/** * Metodo che permette l'append della mail classificata erroneamente dal * server nel dataset corretto.//www .j ava 2 s . c o m * I dataset sono due file, uno contenente tutte le mail classificate come * HAM, l'altro tutte le mail classificate come SPAM. * @param uri Path del dataset * @param mail Testo della mail */ public void appendData(String uri, String mail) { Configuration config = new Configuration(); FileSystem fs; try { fs = FileSystem.get(URI.create(uri), config); FSDataOutputStream fsout = fs.append(new Path(uri)); PrintWriter writer = new PrintWriter(fsout); writer.append(mail.replace('\n', ' ').replace('\r', ' ') + "\n"); writer.close(); fs.close(); } catch (IOException ex) { Logger.getLogger(SparkService.class.getName()).log(Level.SEVERE, null, ex); } }
From source file:com.huayu.metis.flume.sink.hdfs.HDFSSequenceFile.java
License:Apache License
@Override public void open(String filePath) throws IOException { Configuration conf = new Configuration(); Path dstPath = new Path(filePath); FileSystem fileSystem = dstPath.getFileSystem(conf); //2.2Hadoop, dfs.append.support ??? if (fileSystem.exists(dstPath) && fileSystem.isFile(dstPath)) { outStream = fileSystem.append(dstPath); } else {//from w w w. j a v a2 s . co m outStream = fileSystem.create(dstPath); } writer = SequenceFile.createWriter(conf, SequenceFile.Writer.stream(outStream), SequenceFile.Writer.keyClass(serializer.getKeyClass()), SequenceFile.Writer.valueClass(serializer.getValueClass())); registerCurrentStream(outStream, fileSystem, dstPath); }