List of usage examples for org.apache.hadoop.fs FileSystem create
public FSDataOutputStream create(Path f) throws IOException
From source file:com.liveramp.cascading_ext.bloom.BloomFilter.java
License:Apache License
public void writeOut(FileSystem fs, Path path) throws IOException { FSDataOutputStream out = fs.create(path); write(out);// w w w . jav a2 s . c om out.close(); }
From source file:com.liveramp.cascading_ext.FileSystemHelper.java
License:Apache License
public static void createFile(FileSystem fs, String path, String content) throws IOException { FSDataOutputStream os = fs.create(new Path(path)); os.write(content.getBytes());// w w w . j ava 2s .c om os.close(); }
From source file:com.liveramp.hank.hadoop.HadoopTestCase.java
License:Apache License
protected void outputFile(FileSystem fs, String path, String output) throws IOException { FSDataOutputStream os = fs.create(new Path(path)); os.write(output.getBytes());//www .j av a2 s . c o m os.close(); }
From source file:com.metamx.milano.hadoop.MilanoProtoFileOutputFormat.java
License:Apache License
/** * Retrieve a record writer for this RecordWriter. There are three config properties that are supported: * com.metamx.milano.hadoop.filePrefix -- A string to prefix the written file names with. * com.metamx.milano.hadoop.filePath -- A string to postfix on the path. This lets you specify a subdirectory in which to put the files. * com.metamx.milano.proto.descriptor.base64 -- A string representing a base64 encoded DescriptorProto converted to bytes. * This is overridden if the metadata has already been set. * * @param job The {@link TaskAttemptContext} to use. See above for specific options. * * @return A {@link RecordWriter}/*from w w w . j a va 2 s .c o m*/ * * @throws IOException * @throws InterruptedException */ @Override public RecordWriter<K, Message> getRecordWriter(TaskAttemptContext job) throws IOException, InterruptedException { log.debug(String.format("Retrieving record writer")); Configuration conf = job.getConfiguration(); String prefix = conf.get("com.metamx.milano.hadoop.filePrefix", ""); String path = conf.get("com.metamx.milano.hadoop.filePath", "."); if (metadata == null) { String descriptorBytes = conf.get("com.metamx.milano.proto.descriptor.base64"); if (descriptorBytes != null) { metadata = MilanoTool.withBase64(descriptorBytes).getMetadata(); } } String filename = ""; if (!prefix.equals("")) { filename = filename.concat(prefix + "_"); } filename = filename.concat(job.getTaskAttemptID().getTaskID().toString()); Path directory = new Path(((FileOutputCommitter) getOutputCommitter(job)).getWorkPath(), path); Path file = new Path(directory, filename); FileSystem fs = file.getFileSystem(conf); final OutputStream outputStream = fs.create(file); return new RecordWriter<K, Message>() { private MilanoProtoFile.Writer writer = MilanoProtoFile.createWriter(outputStream, metadata); @Override public void write(K key, Message value) throws IOException, InterruptedException { writer.write(value); } @Override public void close(TaskAttemptContext context) throws IOException, InterruptedException { writer.flush(); writer.close(); log.debug("Closed Writer"); } }; }
From source file:com.mongodb.hadoop.BSONFileOutputFormat.java
License:Apache License
@Override public RecordWriter<K, V> getRecordWriter(final TaskAttemptContext context) throws IOException { // Open data output stream Path outPath = getDefaultWorkFile(context, ".bson"); LOG.info("output going into " + outPath); FileSystem fs = outPath.getFileSystem(context.getConfiguration()); FSDataOutputStream outFile = fs.create(outPath); FSDataOutputStream splitFile = null; if (MongoConfigUtil.getBSONOutputBuildSplits(context.getConfiguration())) { Path splitPath = new Path(outPath.getParent(), "." + outPath.getName() + ".splits"); splitFile = fs.create(splitPath); }// w w w. j ava 2s. c om long splitSize = BSONSplitter.getSplitSize(context.getConfiguration(), null); return new BSONFileRecordWriter<K, V>(outFile, splitFile, splitSize); }
From source file:com.mongodb.hadoop.hive.output.HiveBSONFileOutputFormat.java
License:Apache License
/** * create the final output file/* w w w.j a va 2s .co m*/ * * @param jc the job configuration * @param fileOutputPath the file that the output should be directed at * @param valueClass the value class used to create * @param tableProperties the tableInfo for this file's corresponding table * @return RecordWriter for the output file */ @Override public RecordWriter getHiveRecordWriter(final JobConf jc, final Path fileOutputPath, final Class<? extends Writable> valueClass, final boolean isCompressed, final Properties tableProperties, final Progressable progress) throws IOException { LOG.info("Output going into " + fileOutputPath); FileSystem fs = fileOutputPath.getFileSystem(jc); FSDataOutputStream outFile = fs.create(fileOutputPath); FSDataOutputStream splitFile = null; if (MongoConfigUtil.getBSONOutputBuildSplits(jc)) { Path splitPath = new Path(fileOutputPath.getParent(), "." + fileOutputPath.getName() + ".splits"); splitFile = fs.create(splitPath); } long splitSize = BSONSplitter.getSplitSize(jc, null); return new HiveBSONFileRecordWriter(outFile, splitFile, splitSize); }
From source file:com.mongodb.hadoop.mapred.BSONFileOutputFormat.java
License:Apache License
public RecordWriter<K, V> getRecordWriter(final FileSystem ignored, final JobConf job, final String name, final Progressable progress) throws IOException { Path outPath = getDefaultWorkFile(job, name, ".bson"); LOG.info("output going into " + outPath); FileSystem fs = outPath.getFileSystem(job); FSDataOutputStream outFile = fs.create(outPath); FSDataOutputStream splitFile = null; if (MongoConfigUtil.getBSONOutputBuildSplits(job)) { Path splitPath = new Path(outPath.getParent(), "." + outPath.getName() + ".splits"); splitFile = fs.create(splitPath); }/*from ww w . j a v a 2s . c o m*/ long splitSize = BSONSplitter.getSplitSize(job, null); return new BSONFileRecordWriter<K, V>(outFile, splitFile, splitSize); }
From source file:com.mozilla.hadoop.Backup.java
License:Apache License
/** * Get the input source files to be used as input for the backup mappers * @param inputFs//w w w . j a v a 2 s . c om * @param inputPath * @param outputFs * @return * @throws IOException */ public Path[] createInputSources(List<Path> paths, FileSystem outputFs) throws IOException { int suggestedMapRedTasks = conf.getInt("mapred.map.tasks", 1); Path[] inputSources = new Path[suggestedMapRedTasks]; for (int i = 0; i < inputSources.length; i++) { inputSources[i] = new Path(NAME + "-inputsource" + i + ".txt"); } List<BufferedWriter> writers = new ArrayList<BufferedWriter>(); int idx = 0; try { for (Path source : inputSources) { writers.add(new BufferedWriter(new OutputStreamWriter(outputFs.create(source)))); } for (Path p : paths) { writers.get(idx).write(p.toString()); writers.get(idx).newLine(); idx++; if (idx >= inputSources.length) { idx = 0; } } } finally { for (BufferedWriter writer : writers) { checkAndClose(writer); } } return inputSources; }
From source file:com.mvad.flink.demo.streaming.lib.sink.bucketing.BucketingSink.java
License:Apache License
/** * Gets the truncate() call using reflection. * * <p>/* w w w. j a v a 2s . c o m*/ * Note: This code comes from Flume */ private Method reflectTruncate(FileSystem fs) { Method m = null; if (fs != null) { Class<?> fsClass = fs.getClass(); try { m = fsClass.getMethod("truncate", Path.class, long.class); } catch (NoSuchMethodException ex) { LOG.debug( "Truncate not found. Will write a file with suffix '{}' " + " and prefix '{}' to specify how many bytes in a bucket are valid.", validLengthSuffix, validLengthPrefix); return null; } // verify that truncate actually works FSDataOutputStream outputStream; Path testPath = new Path(UUID.randomUUID().toString()); try { outputStream = fs.create(testPath); outputStream.writeUTF("hello"); outputStream.close(); } catch (IOException e) { LOG.error("Could not create file for checking if truncate works.", e); throw new RuntimeException("Could not create file for checking if truncate works.", e); } try { m.invoke(fs, testPath, 2); } catch (IllegalAccessException | InvocationTargetException e) { LOG.debug("Truncate is not supported.", e); m = null; } try { fs.delete(testPath, false); } catch (IOException e) { LOG.error("Could not delete truncate test file.", e); throw new RuntimeException("Could not delete truncate test file.", e); } } return m; }
From source file:com.mvad.flink.demo.streaming.lib.sink.bucketing.BucketingSink.java
License:Apache License
@Override public void restoreState(State<T> state) { this.state = state; FileSystem fs; try {//w ww . j av a2s . c om fs = new Path(basePath).getFileSystem(HadoopFileSystem.getHadoopConfiguration()); } catch (IOException e) { LOG.error("Error while creating FileSystem in checkpoint restore.", e); throw new RuntimeException("Error while creating FileSystem in checkpoint restore.", e); } for (BucketState<T> bucketState : state.bucketStates.values()) { // we can clean all the pending files since they where renamed to final files // after this checkpoint was successful bucketState.pendingFiles.clear(); if (bucketState.currentFile != null) { // We were writing to a file when the last checkpoint occured. This file can either // be still in-progress or became a pending file at some point after the checkpoint. // Either way, we have to truncate it back to a valid state (or write a .valid-length) // file that specifies up to which length it is valid and rename it to the final name // before starting a new bucket file. Path partPath = new Path(bucketState.currentFile); try { Path partPendingPath = new Path(partPath.getParent(), pendingPrefix + partPath.getName()) .suffix(pendingSuffix); Path partInProgressPath = new Path(partPath.getParent(), inProgressPrefix + partPath.getName()) .suffix(inProgressSuffix); if (fs.exists(partPendingPath)) { LOG.debug( "In-progress file {} has been moved to pending after checkpoint, moving to final location.", partPath); // has been moved to pending in the mean time, rename to final location fs.rename(partPendingPath, partPath); } else if (fs.exists(partInProgressPath)) { LOG.debug("In-progress file {} is still in-progress, moving to final location.", partPath); // it was still in progress, rename to final path fs.rename(partInProgressPath, partPath); } else if (fs.exists(partPath)) { LOG.debug("In-Progress file {} was already moved to final location {}.", bucketState.currentFile, partPath); } else { LOG.debug( "In-Progress file {} was neither moved to pending nor is still in progress. Possibly, " + "it was moved to final location by a previous snapshot restore", bucketState.currentFile); } refTruncate = reflectTruncate(fs); // truncate it or write a ".valid-length" file to specify up to which point it is valid if (refTruncate != null) { LOG.debug("Truncating {} to valid length {}", partPath, bucketState.currentFileValidLength); // some-one else might still hold the lease from a previous try, we are // recovering, after all ... if (fs instanceof DistributedFileSystem) { DistributedFileSystem dfs = (DistributedFileSystem) fs; LOG.debug("Trying to recover file lease {}", partPath); dfs.recoverLease(partPath); boolean isclosed = dfs.isFileClosed(partPath); StopWatch sw = new StopWatch(); sw.start(); while (!isclosed) { if (sw.getTime() > asyncTimeout) { break; } try { Thread.sleep(500); } catch (InterruptedException e1) { // ignore it } isclosed = dfs.isFileClosed(partPath); } } Boolean truncated = (Boolean) refTruncate.invoke(fs, partPath, bucketState.currentFileValidLength); if (!truncated) { LOG.debug("Truncate did not immediately complete for {}, waiting...", partPath); // we must wait for the asynchronous truncate operation to complete StopWatch sw = new StopWatch(); sw.start(); long newLen = fs.getFileStatus(partPath).getLen(); while (newLen != bucketState.currentFileValidLength) { if (sw.getTime() > asyncTimeout) { break; } try { Thread.sleep(500); } catch (InterruptedException e1) { // ignore it } newLen = fs.getFileStatus(partPath).getLen(); } if (newLen != bucketState.currentFileValidLength) { throw new RuntimeException("Truncate did not truncate to right length. Should be " + bucketState.currentFileValidLength + " is " + newLen + "."); } } } else { LOG.debug("Writing valid-length file for {} to specify valid length {}", partPath, bucketState.currentFileValidLength); Path validLengthFilePath = new Path(partPath.getParent(), validLengthPrefix + partPath.getName()).suffix(validLengthSuffix); if (!fs.exists(validLengthFilePath)) { FSDataOutputStream lengthFileOut = fs.create(validLengthFilePath); lengthFileOut.writeUTF(Long.toString(bucketState.currentFileValidLength)); lengthFileOut.close(); } } // Now that we've restored the bucket to a valid state, reset the current file info bucketState.currentFile = null; bucketState.currentFileValidLength = -1; } catch (IOException e) { LOG.error("Error while restoring BucketingSink state.", e); throw new RuntimeException("Error while restoring BucketingSink state.", e); } catch (InvocationTargetException | IllegalAccessException e) { LOG.error("Cound not invoke truncate.", e); throw new RuntimeException("Could not invoke truncate.", e); } } LOG.debug("Clearing pending/in-progress files."); // Move files that are confirmed by a checkpoint but did not get moved to final location // because the checkpoint notification did not happen before a failure Set<Long> pastCheckpointIds = bucketState.pendingFilesPerCheckpoint.keySet(); LOG.debug("Moving pending files to final location on restore."); for (Long pastCheckpointId : pastCheckpointIds) { // All the pending files are buckets that have been completed but are waiting to be renamed // to their final name for (String filename : bucketState.pendingFilesPerCheckpoint.get(pastCheckpointId)) { Path finalPath = new Path(filename); Path pendingPath = new Path(finalPath.getParent(), pendingPrefix + finalPath.getName()) .suffix(pendingSuffix); try { if (fs.exists(pendingPath)) { LOG.debug( "(RESTORE) Moving pending file {} to final location after complete checkpoint {}.", pendingPath, pastCheckpointId); fs.rename(pendingPath, finalPath); } } catch (IOException e) { LOG.error("(RESTORE) Error while renaming pending file {} to final path {}: {}", pendingPath, finalPath, e); throw new RuntimeException( "Error while renaming pending file " + pendingPath + " to final path " + finalPath, e); } } } synchronized (bucketState.pendingFilesPerCheckpoint) { bucketState.pendingFilesPerCheckpoint.clear(); } } // we need to get this here since open() has not yet been called int subtaskIndex = getRuntimeContext().getIndexOfThisSubtask(); // delete pending files try { RemoteIterator<LocatedFileStatus> bucketFiles = fs.listFiles(new Path(basePath), true); while (bucketFiles.hasNext()) { LocatedFileStatus file = bucketFiles.next(); if (file.getPath().toString().endsWith(pendingSuffix)) { // only delete files that contain our subtask index if (file.getPath().toString().contains(partPrefix + "-" + subtaskIndex + "-")) { LOG.debug("(RESTORE) Deleting pending file {}", file.getPath().toString()); fs.delete(file.getPath(), true); } } if (file.getPath().toString().endsWith(inProgressSuffix)) { // only delete files that contain our subtask index if (file.getPath().toString().contains(partPrefix + "-" + subtaskIndex + "-")) { LOG.debug("(RESTORE) Deleting in-progress file {}", file.getPath().toString()); fs.delete(file.getPath(), true); } } } } catch (IOException e) { LOG.error("Error while deleting old pending files: {}", e); throw new RuntimeException("Error while deleting old pending files.", e); } }