Example usage for org.apache.hadoop.fs FileSystem create

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem create.

Prototype

public FSDataOutputStream create(Path f) throws IOException

Source Link

Document

Create an FSDataOutputStream at the indicated Path.

Usage

From source file:com.datatorrent.contrib.parser.CSVParserTest.java

License:Apache License

public void createFieldMappingFile() {
    FileSystem hdfs = null;
    //Creating a file in HDFS
    Path newFilePath = new Path(testMeta.getDir() + "/" + filename);
    try {/* w  w  w. j a  v a  2 s  .c  o  m*/
        hdfs = FileSystem.get(new Configuration());
        hdfs.createNewFile(newFilePath);
    } catch (IOException ex) {
        DTThrowable.rethrow(ex);
    }
    //Writing data to a HDFS file
    StringBuilder sb = new StringBuilder();
    sb.append("Eid");
    sb.append(":");
    sb.append("INTEGER");
    sb.append("\n");
    sb.append("Name");
    sb.append(":");
    sb.append("STRING");
    sb.append("\n");
    sb.append("Salary");
    sb.append(":");
    sb.append("LONG");
    sb.append("\n");
    byte[] byt = sb.toString().getBytes();
    try {
        FSDataOutputStream fsOutStream = hdfs.create(newFilePath);
        fsOutStream.write(byt);
        fsOutStream.close();
    } catch (IOException ex) {
        DTThrowable.rethrow(ex);

    }
    logger.debug("Written data to HDFS file.");
}

From source file:com.datatorrent.lib.bucket.HdfsBucketStore.java

License:Open Source License

/**
 * {@inheritDoc}// w ww  .  j av a  2s.com
 */
@Override
public void storeBucketData(long window, long timestamp, Map<Integer, Map<Object, T>> data) throws IOException {
    Path dataFilePath = new Path(bucketRoot + PATH_SEPARATOR + window);
    FileSystem fs = FileSystem.newInstance(dataFilePath.toUri(), configuration);
    FSDataOutputStream dataStream = fs.create(dataFilePath);

    Output output = new Output(dataStream);
    try {
        long offset = 0;
        for (int bucketIdx : data.keySet()) {
            Map<Object, T> bucketData = data.get(bucketIdx);

            if (eventKeyClass == null) {
                Map.Entry<Object, T> eventEntry = bucketData.entrySet().iterator().next();
                eventKeyClass = eventEntry.getKey().getClass();
                if (!writeEventKeysOnly) {
                    @SuppressWarnings("unchecked")
                    Class<T> lEventClass = (Class<T>) eventEntry.getValue().getClass();
                    eventClass = lEventClass;
                }
            }
            //Write the size of data and then data
            dataStream.writeInt(bucketData.size());
            for (Map.Entry<Object, T> entry : bucketData.entrySet()) {
                writeSerde.writeObject(output, entry.getKey());

                if (!writeEventKeysOnly) {
                    int posLength = output.position();
                    output.writeInt(0); //temporary place holder
                    writeSerde.writeObject(output, entry.getValue());
                    int posValue = output.position();
                    int valueLength = posValue - posLength - 4;
                    output.setPosition(posLength);
                    output.writeInt(valueLength);
                    output.setPosition(posValue);
                }
            }
            output.flush();
            if (bucketPositions[bucketIdx] == null) {
                bucketPositions[bucketIdx] = Maps.newHashMap();
            }
            windowToBuckets.put(window, bucketIdx);
            windowToTimestamp.put(window, timestamp);
            synchronized (bucketPositions[bucketIdx]) {
                bucketPositions[bucketIdx].put(window, offset);
            }
            offset = dataStream.getPos();
        }
    } finally {
        output.close();
        dataStream.close();
        fs.close();
    }
}

From source file:com.datatorrent.stram.StreamingContainerManager.java

License:Apache License

/**
 * This method is for saving meta information about this application in HDFS -- the meta information that generally
 * does not change across multiple attempts
 *///from   ww  w.  j a va 2s .  co  m
private void saveMetaInfo() throws IOException {
    Path path = new Path(this.vars.appPath, APP_META_FILENAME + "." + System.nanoTime());
    FileSystem fs = FileSystem.newInstance(path.toUri(), new Configuration());
    try {
        FSDataOutputStream os = fs.create(path);
        try {
            JSONObject top = new JSONObject();
            JSONObject attributes = new JSONObject();
            for (Map.Entry<Attribute<?>, Object> entry : this.plan.getLogicalPlan().getAttributes()
                    .entrySet()) {
                attributes.put(entry.getKey().getSimpleName(), entry.getValue());
            }
            JSONObject customMetrics = new JSONObject();
            for (Map.Entry<String, Map<String, Object>> entry : latestLogicalMetrics.entrySet()) {
                customMetrics.put(entry.getKey(), new JSONArray(entry.getValue().keySet()));
            }
            top.put(APP_META_KEY_ATTRIBUTES, attributes);
            top.put(APP_META_KEY_CUSTOM_METRICS, customMetrics);
            os.write(top.toString().getBytes());
        } catch (JSONException ex) {
            throw new RuntimeException(ex);
        } finally {
            os.close();
        }
        Path origPath = new Path(this.vars.appPath, APP_META_FILENAME);
        fs.rename(path, origPath);
    } finally {
        fs.close();
    }
}

From source file:com.davidgildeh.hadoop.utils.FileUtils.java

License:Apache License

/**
 * Merges a list of input files in a directory to a single file under the 
 * outputpath with a specified filename/*from   w w  w.j av  a2s  .  c om*/
 * 
 * @param inputPath         The input directory containing all the input files. E.g. /input/dir/on/hdfs/
 * @param outputPath        The output path to output the file. E.g. /output/dir/on/hdfs/filename
 * @throws IOException
 */
public static void mergeFiles(String inputPath, String outputPath) throws IOException {

    Path inputDir = new Path(inputPath);
    Path outputFile = new Path(outputPath);
    FileSystem fileSystem = getFileSystem(outputFile);
    checkFileExists(fileSystem, inputDir);

    // Check the input path is a directory
    if (!fileSystem.getFileStatus(inputDir).isDir()) {
        LOG.error("Path '" + inputDir.toString() + "' is not a directory.");
        throw new IOException("Path '" + inputDir.toString() + "' is not a directory.");
    }

    // Create Output File
    OutputStream out = fileSystem.create(outputFile);

    try {

        FileStatus contents[] = fileSystem.listStatus(inputDir);

        // Loop through all files in directory and merge them into one file
        for (int i = 0; i < contents.length; i++) {

            if (!contents[i].isDir()) {

                InputStream in = fileSystem.open(contents[i].getPath());
                try {
                    IOUtils.copyBytes(in, out, fileSystem.getConf(), false);
                } finally {
                    in.close();
                }
            }
        }

    } finally {
        out.close();
        fileSystem.close();
        LOG.info("Merged input files from '" + inputPath + "' to '" + outputPath + "'");
    }
}

From source file:com.digitalpebble.behemoth.util.ContentExtractor.java

License:Apache License

private int generateDocs(String inputf, String outputf) throws IOException, ArchiveException {

    Path input = new Path(inputf);
    Path dirPath = new Path(outputf);

    FileSystem fsout = FileSystem.get(dirPath.toUri(), getConf());

    if (fsout.exists(dirPath) == false)
        fsout.mkdirs(dirPath);//from ww w.java 2s .  c  o m
    else {
        System.err.println("Output " + outputf + " already exists");
        return -1;
    }

    // index file
    Path indexPath = new Path(dirPath, "index");
    if (fsout.exists(indexPath) == false) {
        fsout.createNewFile(indexPath);
    }

    maxNumEntriesInArchive = getConf().getInt(numEntriesPerArchiveParamName, 10000);

    index = fsout.create(indexPath);

    createArchive(dirPath);

    FileSystem fs = input.getFileSystem(getConf());
    FileStatus[] statuses = fs.listStatus(input);
    int count[] = { 0 };
    for (int i = 0; i < statuses.length; i++) {
        FileStatus status = statuses[i];
        Path suPath = status.getPath();
        if (suPath.getName().equals("_SUCCESS"))
            continue;
        generateDocs(suPath, dirPath, count);
    }

    if (index != null)
        index.close();

    if (currentArchive != null) {
        currentArchive.finish();
        currentArchive.close();
    }

    return 0;
}

From source file:com.digitalpebble.behemoth.util.ContentExtractor.java

License:Apache License

private void createArchive(Path dirPath) throws IOException, ArchiveException {
    FileSystem fsout = FileSystem.get(dirPath.toUri(), getConf());
    String archiveType = "zip";
    partNum++;//from  w w  w. ja  va2s.co  m
    FSDataOutputStream currentArchiveOS = fsout
            .create(new Path(dirPath, "part_" + String.format("%06d", partNum) + "." + archiveType));
    currentArchive = new ArchiveStreamFactory().createArchiveOutputStream(archiveType, currentArchiveOS);
    numEntriesInCurrentArchive = 0;
}

From source file:com.facebook.hiveio.output.HiveApiOutputCommitter.java

License:Apache License

/**
 * Write success file to Hadoop if required
 *
 * @param conf Configuration//w w w. j  a v  a2s  .c  o m
 * @throws IOException I/O errors
 */
private static void writeSuccessFile(Configuration conf) throws IOException {
    if (!HadoopUtils.needSuccessMarker(conf)) {
        return;
    }
    Path outputPath = HadoopUtils.getOutputPath(conf);
    FileSystem fs = outputPath.getFileSystem(conf);
    if (fs.exists(outputPath)) {
        Path successPath = new Path(outputPath, "_SUCCESS");
        if (!fs.exists(successPath)) {
            LOG.info("Writing success file to {}", successPath);
            fs.create(successPath).close();
        }
    }
}

From source file:com.facebook.presto.hive.OrcFileWriterFactory.java

License:Apache License

@Override
public Optional<HiveFileWriter> createFileWriter(Path path, List<String> inputColumnNames,
        StorageFormat storageFormat, Properties schema, JobConf configuration, ConnectorSession session) {
    if (!HiveSessionProperties.isOrcOptimizedWriterEnabled(session)) {
        return Optional.empty();
    }/*from w  w w. ja v a  2  s . c  om*/

    boolean isDwrf;
    if (OrcOutputFormat.class.getName().equals(storageFormat.getOutputFormat())) {
        isDwrf = false;
    } else if (com.facebook.hive.orc.OrcOutputFormat.class.getName().equals(storageFormat.getOutputFormat())) {
        isDwrf = true;
    } else {
        return Optional.empty();
    }

    CompressionKind compression = getCompression(schema, configuration);

    // existing tables and partitions may have columns in a different order than the writer is providing, so build
    // an index to rearrange columns in the proper order
    List<String> fileColumnNames = Splitter.on(',').trimResults().omitEmptyStrings()
            .splitToList(schema.getProperty(META_TABLE_COLUMNS, ""));
    List<Type> fileColumnTypes = toHiveTypes(schema.getProperty(META_TABLE_COLUMN_TYPES, "")).stream()
            .map(hiveType -> hiveType.getType(typeManager)).collect(toList());

    int[] fileInputColumnIndexes = fileColumnNames.stream().mapToInt(inputColumnNames::indexOf).toArray();

    try {
        FileSystem fileSystem = hdfsEnvironment.getFileSystem(session.getUser(), path, configuration);
        OutputStream outputStream = fileSystem.create(path);

        Optional<Supplier<OrcDataSource>> validationInputFactory = Optional.empty();
        if (HiveSessionProperties.isOrcOptimizedWriterValidate(session)) {
            validationInputFactory = Optional.of(() -> {
                try {
                    return new HdfsOrcDataSource(new OrcDataSourceId(path.toString()),
                            fileSystem.getFileStatus(path).getLen(), getOrcMaxMergeDistance(session),
                            getOrcMaxBufferSize(session), getOrcStreamBufferSize(session), false,
                            fileSystem.open(path), stats);
                } catch (IOException e) {
                    throw new PrestoException(HIVE_WRITE_VALIDATION_FAILED, e);
                }
            });
        }

        Callable<Void> rollbackAction = () -> {
            fileSystem.delete(path, false);
            return null;
        };

        return Optional.of(new OrcFileWriter(outputStream, rollbackAction, isDwrf, fileColumnNames,
                fileColumnTypes, compression, fileInputColumnIndexes,
                ImmutableMap.<String, String>builder()
                        .put(HiveMetadata.PRESTO_VERSION_NAME, nodeVersion.toString())
                        .put(HiveMetadata.PRESTO_QUERY_ID_NAME, session.getQueryId()).build(),
                hiveStorageTimeZone, validationInputFactory));
    } catch (IOException e) {
        throw new PrestoException(HIVE_WRITER_OPEN_ERROR, "Error creating ORC file", e);
    }
}

From source file:com.facebook.presto.hive.RcFileFileWriterFactory.java

License:Apache License

@Override
public Optional<HiveFileWriter> createFileWriter(Path path, List<String> inputColumnNames,
        StorageFormat storageFormat, Properties schema, JobConf configuration, ConnectorSession session) {
    if (!HiveSessionProperties.isRcfileOptimizedWriterEnabled(session)) {
        return Optional.empty();
    }//from w  w  w  .j av  a2 s . c  o m

    if (!RCFileOutputFormat.class.getName().equals(storageFormat.getOutputFormat())) {
        return Optional.empty();
    }

    RcFileEncoding rcFileEncoding;
    if (LazyBinaryColumnarSerDe.class.getName().equals(storageFormat.getSerDe())) {
        rcFileEncoding = new BinaryRcFileEncoding();
    } else if (ColumnarSerDe.class.getName().equals(storageFormat.getSerDe())) {
        rcFileEncoding = createTextVectorEncoding(schema, hiveStorageTimeZone);
    } else {
        return Optional.empty();
    }

    Optional<String> codecName = Optional.ofNullable(configuration.get(FileOutputFormat.COMPRESS_CODEC));

    // existing tables and partitions may have columns in a different order than the writer is providing, so build
    // an index to rearrange columns in the proper order
    List<String> fileColumnNames = Splitter.on(',').trimResults().omitEmptyStrings()
            .splitToList(schema.getProperty(META_TABLE_COLUMNS, ""));
    List<Type> fileColumnTypes = toHiveTypes(schema.getProperty(META_TABLE_COLUMN_TYPES, "")).stream()
            .map(hiveType -> hiveType.getType(typeManager)).collect(toList());

    int[] fileInputColumnIndexes = fileColumnNames.stream().mapToInt(inputColumnNames::indexOf).toArray();

    try {
        FileSystem fileSystem = hdfsEnvironment.getFileSystem(session.getUser(), path, configuration);
        OutputStream outputStream = fileSystem.create(path);

        Optional<Supplier<RcFileDataSource>> validationInputFactory = Optional.empty();
        if (HiveSessionProperties.isRcfileOptimizedWriterValidate(session)) {
            validationInputFactory = Optional.of(() -> {
                try {
                    return new HdfsRcFileDataSource(path.toString(), fileSystem.open(path),
                            fileSystem.getFileStatus(path).getLen(), stats);
                } catch (IOException e) {
                    throw new PrestoException(HIVE_WRITE_VALIDATION_FAILED, e);
                }
            });
        }

        Callable<Void> rollbackAction = () -> {
            fileSystem.delete(path, false);
            return null;
        };

        return Optional.of(new RcFileFileWriter(outputStream, rollbackAction, rcFileEncoding, fileColumnTypes,
                codecName, fileInputColumnIndexes,
                ImmutableMap.<String, String>builder()
                        .put(HiveMetadata.PRESTO_VERSION_NAME, nodeVersion.toString())
                        .put(HiveMetadata.PRESTO_QUERY_ID_NAME, session.getQueryId()).build(),
                validationInputFactory));
    } catch (Exception e) {
        throw new PrestoException(HIVE_WRITER_OPEN_ERROR, "Error creating RCFile file", e);
    }
}

From source file:com.fanlehai.hadoop.serialize.json.multiline.ExampleJob.java

License:Apache License

/**
 * Writes the contents of {@link #JSON} into a file in the job input
 * directory in HDFS./*from  www.  j a  va2 s. co m*/
 *
 * @param conf
 *            the Hadoop config
 * @param inputDir
 *            the HDFS input directory where we'll write a file
 * @throws IOException
 *             if something goes wrong
 */
public static void writeInput(Configuration conf, Path inputDir) throws IOException {
    FileSystem fs = FileSystem.get(conf);

    if (fs.exists(inputDir)) {
        // throw new IOException(
        // String.format("Input directory '%s' exists - please remove and
        // rerun this example", inputDir));
        fs.delete(inputDir, true);
    }

    OutputStreamWriter writer = new OutputStreamWriter(fs.create(new Path(inputDir, "input.txt")));
    writer.write(JSON);
    IOUtils.closeStream(writer);
}