List of usage examples for org.apache.hadoop.fs FileSystem create
public FSDataOutputStream create(Path f) throws IOException
From source file:com.firewallid.util.FIFile.java
public static void writeStringToHDFSFile(String pathFile, String text) throws IOException { Configuration hadoopConf = new Configuration(); FileSystem fileSystem = FileSystem.get(hadoopConf); Path path = new Path(pathFile); if (fileSystem.exists(path)) { fileSystem.delete(path, true);//from w w w . jav a2s . c om } try (BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(fileSystem.create(path)))) { bw.write(text); } LOG.info("Created file: " + pathFile); }
From source file:com.fullcontact.sstable.index.SSTableIndexIndex.java
License:Apache License
/** * Create and write an index index based on the input Cassandra Index.db file. Read the Index.db and generate chunks * (splits) based on the configured chunk size. * * @param fileSystem Hadoop file system. * @param sstablePath SSTable Index.db.//ww w . ja v a 2s. c om * @throws IOException */ public static void writeIndex(final FileSystem fileSystem, final Path sstablePath) throws IOException { final Configuration configuration = fileSystem.getConf(); final long splitSize = configuration.getLong(HadoopSSTableConstants.HADOOP_SSTABLE_SPLIT_MB, HadoopSSTableConstants.DEFAULT_SPLIT_MB) * 1024 * 1024; final Closer closer = Closer.create(); final Path outputPath = sstablePath.suffix(SSTABLE_INDEX_SUFFIX); final Path inProgressOutputPath = sstablePath.suffix(SSTABLE_INDEX_IN_PROGRESS_SUFFIX); boolean success = false; try { final FSDataOutputStream os = closer.register(fileSystem.create(inProgressOutputPath)); final TLongArrayList splitOffsets = new TLongArrayList(); long currentStart = 0; long currentEnd = 0; final IndexOffsetScanner index = new IndexOffsetScanner(sstablePath, fileSystem); while (index.hasNext()) { // NOTE: This does not give an exact size of this split in bytes but a rough estimate. // This should be good enough since it's only used for sorting splits by size in hadoop land. while (currentEnd - currentStart < splitSize && index.hasNext()) { currentEnd = index.next(); splitOffsets.add(currentEnd); } // Record the split final long[] offsets = splitOffsets.toArray(); os.writeLong(offsets[0]); // Start os.writeLong(offsets[offsets.length - 1]); // End // Clear the offsets splitOffsets.clear(); if (index.hasNext()) { currentStart = index.next(); currentEnd = currentStart; splitOffsets.add(currentStart); } } success = true; } finally { closer.close(); if (!success) { fileSystem.delete(inProgressOutputPath, false); } else { fileSystem.rename(inProgressOutputPath, outputPath); } } }
From source file:com.gameloft.bi.sparkplay.Main.java
public static void dynamicW() throws IOException { Path file = new Path("/test_io_dyn_001_rw"); int nbRows = 20_000_000; int i = 0, bitCounter = 0, blockSize = 128 * 1024 * 1024; Configuration conf = new Configuration(); conf.set("fs.default.name", "hdfs://bird001.buc.gameloft.org:9000"); FileSystem fs = FileSystem.get(conf); long start = System.currentTimeMillis(); try (FSDataOutputStream out = fs.create(file)) { while (i < nbRows) { if (i % 2 == 0) { String msg = "sixty-nine"; byte[] msgBytes = msg.getBytes(); short rowLen = (short) (4 + 8 + 2 + msgBytes.length); int remaining = bitCounter % blockSize; if (remaining < rowLen) { bitCounter += remaining; byte[] empty = new byte[remaining]; out.write(empty);/*w w w .ja v a2s.c o m*/ } else { bitCounter += rowLen; out.writeShort(rowLen); out.writeInt(69); out.writeDouble(69.69); out.writeShort(msgBytes.length); out.write(msgBytes); } } else { String msg = "ninety-sixxx"; byte[] msgBytes = msg.getBytes(); short rowLen = (short) (4 + 2 + msgBytes.length + 8 + 4); bitCounter += rowLen; out.writeShort(rowLen); out.writeInt(96); out.writeShort(msgBytes.length); out.write(msgBytes); out.writeDouble(69.69); out.writeInt(96); } i++; } } System.out.println("write: " + ((System.currentTimeMillis() - start))); }
From source file:com.gameloft.bi.sparkplay.Main.java
public static void simpleRW() throws IOException { Path file = new Path("/test_io_007_rw"); int nbRows = 20_000_000; int i = 0;/*from ww w.ja v a 2 s.co m*/ Configuration conf = new Configuration(); conf.set("fs.default.name", "hdfs://bird001.buc.gameloft.org:9000"); FileSystem fs = FileSystem.get(conf); long start = System.currentTimeMillis(); try (FSDataOutputStream out = fs.create(file)) { while (i < nbRows) { String msg = "sixty-nine"; byte[] msgBytes = msg.getBytes(); out.writeInt(69); out.writeDouble(69.69); out.writeShort(msgBytes.length); out.write(msgBytes); i++; } } System.out.println("write: " + ((System.currentTimeMillis() - start))); }
From source file:com.gameloft.bi.sparkplay.Main.java
public static void writeCsvFile() throws IOException { int nbRows = 10_000_000; int i = 0;/*from w w w . java 2 s . c o m*/ Path file = new Path("/test_io_csv_003_rw"); Configuration conf = new Configuration(); conf.set("fs.default.name", "hdfs://bird001.buc.gameloft.org:9000"); FileSystem fs = FileSystem.get(conf); Random rand = new Random(); try (FSDataOutputStream out = fs.create(file)) { StringBuffer buffer = new StringBuffer(); while (i < nbRows) { buffer.append("param").append(rand.nextInt(20)).append(","); buffer.append("param").append(rand.nextInt(20)).append(","); buffer.append("param").append(rand.nextInt(20)).append("\n"); i++; if (i % 1000 == 1) { out.write(buffer.toString().getBytes()); buffer = new StringBuffer(); } } if (buffer.length() > 0) { out.write(buffer.toString().getBytes()); } } }
From source file:com.gemstone.gemfire.cache.hdfs.internal.hoplog.AbstractHoplogOrganizer.java
License:Apache License
/** * creates a expiry marker for a file on file system * /* ww w. j av a 2 s. co m*/ * @param hoplog * @throws IOException */ protected void addExpiryMarkerForAFile(Hoplog hoplog) throws IOException { FileSystem fs = store.getFileSystem(); // TODO optimization needed here. instead of creating expired marker // file per file, create a meta file. the main thing to worry is // compaction of meta file itself Path expiryMarker = getExpiryMarkerPath(hoplog.getFileName()); // uh-oh, why are we trying to expire an already expired file? if (ENABLE_INTEGRITY_CHECKS) { Assert.assertTrue(!fs.exists(expiryMarker), "Expiry marker already exists: " + expiryMarker); } FSDataOutputStream expiryMarkerFile = fs.create(expiryMarker); expiryMarkerFile.close(); if (logger.isDebugEnabled()) logger.debug("Hoplog marked expired: " + getPathStr(hoplog)); }
From source file:com.gemstone.gemfire.cache.hdfs.internal.hoplog.mapreduce.HoplogUtil.java
License:Apache License
public static void exposeCleanupIntervalMillis(FileSystem fs, Path path, long intervalDurationMillis) { FSDataInputStream input = null;/* w w w .j a va 2 s. c om*/ FSDataOutputStream output = null; try { if (fs.exists(path)) { input = new FSDataInputStream(fs.open(path)); if (intervalDurationMillis == input.readLong()) { input.close(); return; } input.close(); fs.delete(path, true); } output = fs.create(path); output.writeLong(intervalDurationMillis); output.close(); } catch (IOException e) { return; } finally { try { if (input != null) { input.close(); } if (output != null) { output.close(); } } catch (IOException e2) { } } }
From source file:com.github.cbismuth.spark.utils.cluster.writer.AvroWriter.java
License:Open Source License
public <T> void write(final FileSystem fileSystem, final Schema schema, final Collection<T> objects, final RecordMapper<T> mapper, final String path) throws IOException { final DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema); try (final OutputStream outputStream = fileSystem.create(new Path(path)); final DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(datumWriter); final DataFileWriter<GenericRecord> ignored = dataFileWriter.create(schema, outputStream)) { objects.forEach(object -> {// ww w . j a v a 2s .c om try { final GenericRecord record = mapper.mapRecord(schema, object); dataFileWriter.append(record); } catch (final Exception e) { LOGGER.error(e.getMessage(), e); throw Throwables.propagate(e); } }); } }
From source file:com.github.hadoop.maven.plugin.deploy.DeployMojo.java
License:Apache License
@Override public void execute() throws MojoExecutionException, MojoFailureException { if (this.path == null) { throw new MojoExecutionException("path property needs to be set for the plugin to work"); }/*from ww w .jav a 2s . c o m*/ final File jarFile = new File(this.outputDirectory.getAbsolutePath() + File.separator + this.project.getArtifactId() + "-" + this.project.getVersion() + "-hdeploy.jar"); InputStream is; try { is = new BufferedInputStream(new FileInputStream(jarFile)); } catch (final FileNotFoundException e1) { throw new MojoExecutionException("The artifact was not found. Please run goal: hadoop:pack"); } final Configuration conf = new Configuration(); try { final FileSystem fs = FileSystem.get(URI.create(path), conf); final OutputStream out = fs.create(new Path(path, jarFile.getName())); IOUtils.copyBytes(is, out, conf); getLog().info("Successful transferred artifact to " + path); } catch (final IOException e) { throw new MojoExecutionException("error while accessing hdfs"); } }
From source file:com.github.sakserv.minicluster.HdfsLocalClusterIntegrationTest.java
License:Apache License
@Test public void testDfsClusterStart() throws IOException { // Write a file to HDFS containing the test string FileSystem hdfsFsHandle = dfsCluster.getHdfsFileSystemHandle(); FSDataOutputStream writer = hdfsFsHandle .create(new Path(propertyParser.getProperty(ConfigVars.HDFS_TEST_FILE_KEY))); writer.writeUTF(propertyParser.getProperty(ConfigVars.HDFS_TEST_STRING_KEY)); writer.close();//from www. jav a2 s . c o m // Read the file and compare to test string FSDataInputStream reader = hdfsFsHandle .open(new Path(propertyParser.getProperty(ConfigVars.HDFS_TEST_FILE_KEY))); assertEquals(reader.readUTF(), propertyParser.getProperty(ConfigVars.HDFS_TEST_STRING_KEY)); reader.close(); hdfsFsHandle.close(); }