List of usage examples for org.apache.hadoop.fs FileSystem create
public FSDataOutputStream create(Path f) throws IOException
From source file:com.datatorrent.contrib.parser.CSVParserTest.java
License:Apache License
public void createFieldMappingFile() { FileSystem hdfs = null; //Creating a file in HDFS Path newFilePath = new Path(testMeta.getDir() + "/" + filename); try {/* w w w. j a v a 2 s .c o m*/ hdfs = FileSystem.get(new Configuration()); hdfs.createNewFile(newFilePath); } catch (IOException ex) { DTThrowable.rethrow(ex); } //Writing data to a HDFS file StringBuilder sb = new StringBuilder(); sb.append("Eid"); sb.append(":"); sb.append("INTEGER"); sb.append("\n"); sb.append("Name"); sb.append(":"); sb.append("STRING"); sb.append("\n"); sb.append("Salary"); sb.append(":"); sb.append("LONG"); sb.append("\n"); byte[] byt = sb.toString().getBytes(); try { FSDataOutputStream fsOutStream = hdfs.create(newFilePath); fsOutStream.write(byt); fsOutStream.close(); } catch (IOException ex) { DTThrowable.rethrow(ex); } logger.debug("Written data to HDFS file."); }
From source file:com.datatorrent.lib.bucket.HdfsBucketStore.java
License:Open Source License
/** * {@inheritDoc}// w ww . j av a 2s.com */ @Override public void storeBucketData(long window, long timestamp, Map<Integer, Map<Object, T>> data) throws IOException { Path dataFilePath = new Path(bucketRoot + PATH_SEPARATOR + window); FileSystem fs = FileSystem.newInstance(dataFilePath.toUri(), configuration); FSDataOutputStream dataStream = fs.create(dataFilePath); Output output = new Output(dataStream); try { long offset = 0; for (int bucketIdx : data.keySet()) { Map<Object, T> bucketData = data.get(bucketIdx); if (eventKeyClass == null) { Map.Entry<Object, T> eventEntry = bucketData.entrySet().iterator().next(); eventKeyClass = eventEntry.getKey().getClass(); if (!writeEventKeysOnly) { @SuppressWarnings("unchecked") Class<T> lEventClass = (Class<T>) eventEntry.getValue().getClass(); eventClass = lEventClass; } } //Write the size of data and then data dataStream.writeInt(bucketData.size()); for (Map.Entry<Object, T> entry : bucketData.entrySet()) { writeSerde.writeObject(output, entry.getKey()); if (!writeEventKeysOnly) { int posLength = output.position(); output.writeInt(0); //temporary place holder writeSerde.writeObject(output, entry.getValue()); int posValue = output.position(); int valueLength = posValue - posLength - 4; output.setPosition(posLength); output.writeInt(valueLength); output.setPosition(posValue); } } output.flush(); if (bucketPositions[bucketIdx] == null) { bucketPositions[bucketIdx] = Maps.newHashMap(); } windowToBuckets.put(window, bucketIdx); windowToTimestamp.put(window, timestamp); synchronized (bucketPositions[bucketIdx]) { bucketPositions[bucketIdx].put(window, offset); } offset = dataStream.getPos(); } } finally { output.close(); dataStream.close(); fs.close(); } }
From source file:com.datatorrent.stram.StreamingContainerManager.java
License:Apache License
/** * This method is for saving meta information about this application in HDFS -- the meta information that generally * does not change across multiple attempts *///from ww w. j a va 2s . co m private void saveMetaInfo() throws IOException { Path path = new Path(this.vars.appPath, APP_META_FILENAME + "." + System.nanoTime()); FileSystem fs = FileSystem.newInstance(path.toUri(), new Configuration()); try { FSDataOutputStream os = fs.create(path); try { JSONObject top = new JSONObject(); JSONObject attributes = new JSONObject(); for (Map.Entry<Attribute<?>, Object> entry : this.plan.getLogicalPlan().getAttributes() .entrySet()) { attributes.put(entry.getKey().getSimpleName(), entry.getValue()); } JSONObject customMetrics = new JSONObject(); for (Map.Entry<String, Map<String, Object>> entry : latestLogicalMetrics.entrySet()) { customMetrics.put(entry.getKey(), new JSONArray(entry.getValue().keySet())); } top.put(APP_META_KEY_ATTRIBUTES, attributes); top.put(APP_META_KEY_CUSTOM_METRICS, customMetrics); os.write(top.toString().getBytes()); } catch (JSONException ex) { throw new RuntimeException(ex); } finally { os.close(); } Path origPath = new Path(this.vars.appPath, APP_META_FILENAME); fs.rename(path, origPath); } finally { fs.close(); } }
From source file:com.davidgildeh.hadoop.utils.FileUtils.java
License:Apache License
/** * Merges a list of input files in a directory to a single file under the * outputpath with a specified filename/*from w w w.j av a2s . c om*/ * * @param inputPath The input directory containing all the input files. E.g. /input/dir/on/hdfs/ * @param outputPath The output path to output the file. E.g. /output/dir/on/hdfs/filename * @throws IOException */ public static void mergeFiles(String inputPath, String outputPath) throws IOException { Path inputDir = new Path(inputPath); Path outputFile = new Path(outputPath); FileSystem fileSystem = getFileSystem(outputFile); checkFileExists(fileSystem, inputDir); // Check the input path is a directory if (!fileSystem.getFileStatus(inputDir).isDir()) { LOG.error("Path '" + inputDir.toString() + "' is not a directory."); throw new IOException("Path '" + inputDir.toString() + "' is not a directory."); } // Create Output File OutputStream out = fileSystem.create(outputFile); try { FileStatus contents[] = fileSystem.listStatus(inputDir); // Loop through all files in directory and merge them into one file for (int i = 0; i < contents.length; i++) { if (!contents[i].isDir()) { InputStream in = fileSystem.open(contents[i].getPath()); try { IOUtils.copyBytes(in, out, fileSystem.getConf(), false); } finally { in.close(); } } } } finally { out.close(); fileSystem.close(); LOG.info("Merged input files from '" + inputPath + "' to '" + outputPath + "'"); } }
From source file:com.digitalpebble.behemoth.util.ContentExtractor.java
License:Apache License
private int generateDocs(String inputf, String outputf) throws IOException, ArchiveException { Path input = new Path(inputf); Path dirPath = new Path(outputf); FileSystem fsout = FileSystem.get(dirPath.toUri(), getConf()); if (fsout.exists(dirPath) == false) fsout.mkdirs(dirPath);//from ww w.java 2s . c o m else { System.err.println("Output " + outputf + " already exists"); return -1; } // index file Path indexPath = new Path(dirPath, "index"); if (fsout.exists(indexPath) == false) { fsout.createNewFile(indexPath); } maxNumEntriesInArchive = getConf().getInt(numEntriesPerArchiveParamName, 10000); index = fsout.create(indexPath); createArchive(dirPath); FileSystem fs = input.getFileSystem(getConf()); FileStatus[] statuses = fs.listStatus(input); int count[] = { 0 }; for (int i = 0; i < statuses.length; i++) { FileStatus status = statuses[i]; Path suPath = status.getPath(); if (suPath.getName().equals("_SUCCESS")) continue; generateDocs(suPath, dirPath, count); } if (index != null) index.close(); if (currentArchive != null) { currentArchive.finish(); currentArchive.close(); } return 0; }
From source file:com.digitalpebble.behemoth.util.ContentExtractor.java
License:Apache License
private void createArchive(Path dirPath) throws IOException, ArchiveException { FileSystem fsout = FileSystem.get(dirPath.toUri(), getConf()); String archiveType = "zip"; partNum++;//from w w w. ja va2s.co m FSDataOutputStream currentArchiveOS = fsout .create(new Path(dirPath, "part_" + String.format("%06d", partNum) + "." + archiveType)); currentArchive = new ArchiveStreamFactory().createArchiveOutputStream(archiveType, currentArchiveOS); numEntriesInCurrentArchive = 0; }
From source file:com.facebook.hiveio.output.HiveApiOutputCommitter.java
License:Apache License
/** * Write success file to Hadoop if required * * @param conf Configuration//w w w. j a v a2s .c o m * @throws IOException I/O errors */ private static void writeSuccessFile(Configuration conf) throws IOException { if (!HadoopUtils.needSuccessMarker(conf)) { return; } Path outputPath = HadoopUtils.getOutputPath(conf); FileSystem fs = outputPath.getFileSystem(conf); if (fs.exists(outputPath)) { Path successPath = new Path(outputPath, "_SUCCESS"); if (!fs.exists(successPath)) { LOG.info("Writing success file to {}", successPath); fs.create(successPath).close(); } } }
From source file:com.facebook.presto.hive.OrcFileWriterFactory.java
License:Apache License
@Override public Optional<HiveFileWriter> createFileWriter(Path path, List<String> inputColumnNames, StorageFormat storageFormat, Properties schema, JobConf configuration, ConnectorSession session) { if (!HiveSessionProperties.isOrcOptimizedWriterEnabled(session)) { return Optional.empty(); }/*from w w w. ja v a 2 s . c om*/ boolean isDwrf; if (OrcOutputFormat.class.getName().equals(storageFormat.getOutputFormat())) { isDwrf = false; } else if (com.facebook.hive.orc.OrcOutputFormat.class.getName().equals(storageFormat.getOutputFormat())) { isDwrf = true; } else { return Optional.empty(); } CompressionKind compression = getCompression(schema, configuration); // existing tables and partitions may have columns in a different order than the writer is providing, so build // an index to rearrange columns in the proper order List<String> fileColumnNames = Splitter.on(',').trimResults().omitEmptyStrings() .splitToList(schema.getProperty(META_TABLE_COLUMNS, "")); List<Type> fileColumnTypes = toHiveTypes(schema.getProperty(META_TABLE_COLUMN_TYPES, "")).stream() .map(hiveType -> hiveType.getType(typeManager)).collect(toList()); int[] fileInputColumnIndexes = fileColumnNames.stream().mapToInt(inputColumnNames::indexOf).toArray(); try { FileSystem fileSystem = hdfsEnvironment.getFileSystem(session.getUser(), path, configuration); OutputStream outputStream = fileSystem.create(path); Optional<Supplier<OrcDataSource>> validationInputFactory = Optional.empty(); if (HiveSessionProperties.isOrcOptimizedWriterValidate(session)) { validationInputFactory = Optional.of(() -> { try { return new HdfsOrcDataSource(new OrcDataSourceId(path.toString()), fileSystem.getFileStatus(path).getLen(), getOrcMaxMergeDistance(session), getOrcMaxBufferSize(session), getOrcStreamBufferSize(session), false, fileSystem.open(path), stats); } catch (IOException e) { throw new PrestoException(HIVE_WRITE_VALIDATION_FAILED, e); } }); } Callable<Void> rollbackAction = () -> { fileSystem.delete(path, false); return null; }; return Optional.of(new OrcFileWriter(outputStream, rollbackAction, isDwrf, fileColumnNames, fileColumnTypes, compression, fileInputColumnIndexes, ImmutableMap.<String, String>builder() .put(HiveMetadata.PRESTO_VERSION_NAME, nodeVersion.toString()) .put(HiveMetadata.PRESTO_QUERY_ID_NAME, session.getQueryId()).build(), hiveStorageTimeZone, validationInputFactory)); } catch (IOException e) { throw new PrestoException(HIVE_WRITER_OPEN_ERROR, "Error creating ORC file", e); } }
From source file:com.facebook.presto.hive.RcFileFileWriterFactory.java
License:Apache License
@Override public Optional<HiveFileWriter> createFileWriter(Path path, List<String> inputColumnNames, StorageFormat storageFormat, Properties schema, JobConf configuration, ConnectorSession session) { if (!HiveSessionProperties.isRcfileOptimizedWriterEnabled(session)) { return Optional.empty(); }//from w w w .j av a2 s . c o m if (!RCFileOutputFormat.class.getName().equals(storageFormat.getOutputFormat())) { return Optional.empty(); } RcFileEncoding rcFileEncoding; if (LazyBinaryColumnarSerDe.class.getName().equals(storageFormat.getSerDe())) { rcFileEncoding = new BinaryRcFileEncoding(); } else if (ColumnarSerDe.class.getName().equals(storageFormat.getSerDe())) { rcFileEncoding = createTextVectorEncoding(schema, hiveStorageTimeZone); } else { return Optional.empty(); } Optional<String> codecName = Optional.ofNullable(configuration.get(FileOutputFormat.COMPRESS_CODEC)); // existing tables and partitions may have columns in a different order than the writer is providing, so build // an index to rearrange columns in the proper order List<String> fileColumnNames = Splitter.on(',').trimResults().omitEmptyStrings() .splitToList(schema.getProperty(META_TABLE_COLUMNS, "")); List<Type> fileColumnTypes = toHiveTypes(schema.getProperty(META_TABLE_COLUMN_TYPES, "")).stream() .map(hiveType -> hiveType.getType(typeManager)).collect(toList()); int[] fileInputColumnIndexes = fileColumnNames.stream().mapToInt(inputColumnNames::indexOf).toArray(); try { FileSystem fileSystem = hdfsEnvironment.getFileSystem(session.getUser(), path, configuration); OutputStream outputStream = fileSystem.create(path); Optional<Supplier<RcFileDataSource>> validationInputFactory = Optional.empty(); if (HiveSessionProperties.isRcfileOptimizedWriterValidate(session)) { validationInputFactory = Optional.of(() -> { try { return new HdfsRcFileDataSource(path.toString(), fileSystem.open(path), fileSystem.getFileStatus(path).getLen(), stats); } catch (IOException e) { throw new PrestoException(HIVE_WRITE_VALIDATION_FAILED, e); } }); } Callable<Void> rollbackAction = () -> { fileSystem.delete(path, false); return null; }; return Optional.of(new RcFileFileWriter(outputStream, rollbackAction, rcFileEncoding, fileColumnTypes, codecName, fileInputColumnIndexes, ImmutableMap.<String, String>builder() .put(HiveMetadata.PRESTO_VERSION_NAME, nodeVersion.toString()) .put(HiveMetadata.PRESTO_QUERY_ID_NAME, session.getQueryId()).build(), validationInputFactory)); } catch (Exception e) { throw new PrestoException(HIVE_WRITER_OPEN_ERROR, "Error creating RCFile file", e); } }
From source file:com.fanlehai.hadoop.serialize.json.multiline.ExampleJob.java
License:Apache License
/** * Writes the contents of {@link #JSON} into a file in the job input * directory in HDFS./*from www. j a va2 s. co m*/ * * @param conf * the Hadoop config * @param inputDir * the HDFS input directory where we'll write a file * @throws IOException * if something goes wrong */ public static void writeInput(Configuration conf, Path inputDir) throws IOException { FileSystem fs = FileSystem.get(conf); if (fs.exists(inputDir)) { // throw new IOException( // String.format("Input directory '%s' exists - please remove and // rerun this example", inputDir)); fs.delete(inputDir, true); } OutputStreamWriter writer = new OutputStreamWriter(fs.create(new Path(inputDir, "input.txt"))); writer.write(JSON); IOUtils.closeStream(writer); }