Example usage for org.apache.hadoop.fs FileSystem create

List of usage examples for org.apache.hadoop.fs FileSystem create

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem create.

Prototype

public FSDataOutputStream create(Path f) throws IOException 

Source Link

Document

Create an FSDataOutputStream at the indicated Path.

Usage

From source file:com.liveramp.cascading_ext.bloom.BloomFilter.java

License:Apache License

public void writeOut(FileSystem fs, Path path) throws IOException {
    FSDataOutputStream out = fs.create(path);
    write(out);// w  w w  .  jav a2 s . c om
    out.close();
}

From source file:com.liveramp.cascading_ext.FileSystemHelper.java

License:Apache License

public static void createFile(FileSystem fs, String path, String content) throws IOException {
    FSDataOutputStream os = fs.create(new Path(path));
    os.write(content.getBytes());//  w  w  w . j ava 2s  .c om
    os.close();
}

From source file:com.liveramp.hank.hadoop.HadoopTestCase.java

License:Apache License

protected void outputFile(FileSystem fs, String path, String output) throws IOException {
    FSDataOutputStream os = fs.create(new Path(path));
    os.write(output.getBytes());//www .j av a2 s . c o m
    os.close();
}

From source file:com.metamx.milano.hadoop.MilanoProtoFileOutputFormat.java

License:Apache License

/**
 * Retrieve a record writer for this RecordWriter. There are three config properties that are supported:
 * com.metamx.milano.hadoop.filePrefix -- A string to prefix the written file names with.
 * com.metamx.milano.hadoop.filePath   -- A string to postfix on the path. This lets you specify a subdirectory in which to put the files.
 * com.metamx.milano.proto.descriptor.base64 -- A string representing a base64 encoded DescriptorProto converted to bytes.
 * This is overridden if the metadata has already been set.
 *
 * @param job The {@link TaskAttemptContext} to use. See above for specific options.
 *
 * @return A {@link RecordWriter}/*from w w w  .  j a  va  2 s .c  o m*/
 *
 * @throws IOException
 * @throws InterruptedException
 */
@Override
public RecordWriter<K, Message> getRecordWriter(TaskAttemptContext job)
        throws IOException, InterruptedException {
    log.debug(String.format("Retrieving record writer"));
    Configuration conf = job.getConfiguration();

    String prefix = conf.get("com.metamx.milano.hadoop.filePrefix", "");
    String path = conf.get("com.metamx.milano.hadoop.filePath", ".");

    if (metadata == null) {
        String descriptorBytes = conf.get("com.metamx.milano.proto.descriptor.base64");
        if (descriptorBytes != null) {
            metadata = MilanoTool.withBase64(descriptorBytes).getMetadata();
        }
    }

    String filename = "";
    if (!prefix.equals("")) {
        filename = filename.concat(prefix + "_");
    }
    filename = filename.concat(job.getTaskAttemptID().getTaskID().toString());
    Path directory = new Path(((FileOutputCommitter) getOutputCommitter(job)).getWorkPath(), path);

    Path file = new Path(directory, filename);
    FileSystem fs = file.getFileSystem(conf);

    final OutputStream outputStream = fs.create(file);

    return new RecordWriter<K, Message>() {
        private MilanoProtoFile.Writer writer = MilanoProtoFile.createWriter(outputStream, metadata);

        @Override
        public void write(K key, Message value) throws IOException, InterruptedException {
            writer.write(value);
        }

        @Override
        public void close(TaskAttemptContext context) throws IOException, InterruptedException {
            writer.flush();
            writer.close();
            log.debug("Closed Writer");
        }
    };
}

From source file:com.mongodb.hadoop.BSONFileOutputFormat.java

License:Apache License

@Override
public RecordWriter<K, V> getRecordWriter(final TaskAttemptContext context) throws IOException {
    // Open data output stream

    Path outPath = getDefaultWorkFile(context, ".bson");
    LOG.info("output going into " + outPath);

    FileSystem fs = outPath.getFileSystem(context.getConfiguration());
    FSDataOutputStream outFile = fs.create(outPath);

    FSDataOutputStream splitFile = null;
    if (MongoConfigUtil.getBSONOutputBuildSplits(context.getConfiguration())) {
        Path splitPath = new Path(outPath.getParent(), "." + outPath.getName() + ".splits");
        splitFile = fs.create(splitPath);
    }// w w  w.  j  ava  2s.  c om

    long splitSize = BSONSplitter.getSplitSize(context.getConfiguration(), null);
    return new BSONFileRecordWriter<K, V>(outFile, splitFile, splitSize);
}

From source file:com.mongodb.hadoop.hive.output.HiveBSONFileOutputFormat.java

License:Apache License

/**
 * create the final output file/*  w  w  w.j a  va 2s  .co  m*/
 *
 * @param jc              the job configuration
 * @param fileOutputPath  the file that the output should be directed at
 * @param valueClass      the value class used to create
 * @param tableProperties the tableInfo for this file's corresponding table
 * @return RecordWriter for the output file
 */
@Override
public RecordWriter getHiveRecordWriter(final JobConf jc, final Path fileOutputPath,
        final Class<? extends Writable> valueClass, final boolean isCompressed,
        final Properties tableProperties, final Progressable progress) throws IOException {

    LOG.info("Output going into " + fileOutputPath);

    FileSystem fs = fileOutputPath.getFileSystem(jc);
    FSDataOutputStream outFile = fs.create(fileOutputPath);

    FSDataOutputStream splitFile = null;
    if (MongoConfigUtil.getBSONOutputBuildSplits(jc)) {
        Path splitPath = new Path(fileOutputPath.getParent(), "." + fileOutputPath.getName() + ".splits");
        splitFile = fs.create(splitPath);
    }

    long splitSize = BSONSplitter.getSplitSize(jc, null);

    return new HiveBSONFileRecordWriter(outFile, splitFile, splitSize);
}

From source file:com.mongodb.hadoop.mapred.BSONFileOutputFormat.java

License:Apache License

public RecordWriter<K, V> getRecordWriter(final FileSystem ignored, final JobConf job, final String name,
        final Progressable progress) throws IOException {
    Path outPath = getDefaultWorkFile(job, name, ".bson");
    LOG.info("output going into " + outPath);

    FileSystem fs = outPath.getFileSystem(job);
    FSDataOutputStream outFile = fs.create(outPath);

    FSDataOutputStream splitFile = null;
    if (MongoConfigUtil.getBSONOutputBuildSplits(job)) {
        Path splitPath = new Path(outPath.getParent(), "." + outPath.getName() + ".splits");
        splitFile = fs.create(splitPath);
    }/*from ww w .  j a v  a  2s  .  c  o  m*/

    long splitSize = BSONSplitter.getSplitSize(job, null);

    return new BSONFileRecordWriter<K, V>(outFile, splitFile, splitSize);
}

From source file:com.mozilla.hadoop.Backup.java

License:Apache License

/**
 * Get the input source files to be used as input for the backup mappers
 * @param inputFs//w w w . j a  v  a  2 s  .  c  om
 * @param inputPath
 * @param outputFs
 * @return
 * @throws IOException
 */
public Path[] createInputSources(List<Path> paths, FileSystem outputFs) throws IOException {
    int suggestedMapRedTasks = conf.getInt("mapred.map.tasks", 1);
    Path[] inputSources = new Path[suggestedMapRedTasks];
    for (int i = 0; i < inputSources.length; i++) {
        inputSources[i] = new Path(NAME + "-inputsource" + i + ".txt");
    }
    List<BufferedWriter> writers = new ArrayList<BufferedWriter>();
    int idx = 0;
    try {
        for (Path source : inputSources) {
            writers.add(new BufferedWriter(new OutputStreamWriter(outputFs.create(source))));
        }
        for (Path p : paths) {
            writers.get(idx).write(p.toString());
            writers.get(idx).newLine();

            idx++;
            if (idx >= inputSources.length) {
                idx = 0;
            }
        }
    } finally {
        for (BufferedWriter writer : writers) {
            checkAndClose(writer);
        }
    }

    return inputSources;
}

From source file:com.mvad.flink.demo.streaming.lib.sink.bucketing.BucketingSink.java

License:Apache License

/**
 * Gets the truncate() call using reflection.
 *
 * <p>/* w  w  w.  j  a v a  2s . c o m*/
 * Note: This code comes from Flume
 */
private Method reflectTruncate(FileSystem fs) {
    Method m = null;
    if (fs != null) {
        Class<?> fsClass = fs.getClass();
        try {
            m = fsClass.getMethod("truncate", Path.class, long.class);
        } catch (NoSuchMethodException ex) {
            LOG.debug(
                    "Truncate not found. Will write a file with suffix '{}' "
                            + " and prefix '{}' to specify how many bytes in a bucket are valid.",
                    validLengthSuffix, validLengthPrefix);
            return null;
        }

        // verify that truncate actually works
        FSDataOutputStream outputStream;
        Path testPath = new Path(UUID.randomUUID().toString());
        try {
            outputStream = fs.create(testPath);
            outputStream.writeUTF("hello");
            outputStream.close();
        } catch (IOException e) {
            LOG.error("Could not create file for checking if truncate works.", e);
            throw new RuntimeException("Could not create file for checking if truncate works.", e);
        }

        try {
            m.invoke(fs, testPath, 2);
        } catch (IllegalAccessException | InvocationTargetException e) {
            LOG.debug("Truncate is not supported.", e);
            m = null;
        }

        try {
            fs.delete(testPath, false);
        } catch (IOException e) {
            LOG.error("Could not delete truncate test file.", e);
            throw new RuntimeException("Could not delete truncate test file.", e);
        }
    }
    return m;
}

From source file:com.mvad.flink.demo.streaming.lib.sink.bucketing.BucketingSink.java

License:Apache License

@Override
public void restoreState(State<T> state) {
    this.state = state;

    FileSystem fs;
    try {//w  ww  . j  av a2s  . c  om
        fs = new Path(basePath).getFileSystem(HadoopFileSystem.getHadoopConfiguration());
    } catch (IOException e) {
        LOG.error("Error while creating FileSystem in checkpoint restore.", e);
        throw new RuntimeException("Error while creating FileSystem in checkpoint restore.", e);
    }

    for (BucketState<T> bucketState : state.bucketStates.values()) {
        // we can clean all the pending files since they where renamed to final files
        // after this checkpoint was successful
        bucketState.pendingFiles.clear();

        if (bucketState.currentFile != null) {
            // We were writing to a file when the last checkpoint occured. This file can either
            // be still in-progress or became a pending file at some point after the checkpoint.
            // Either way, we have to truncate it back to a valid state (or write a .valid-length)
            // file that specifies up to which length it is valid and rename it to the final name
            // before starting a new bucket file.
            Path partPath = new Path(bucketState.currentFile);
            try {
                Path partPendingPath = new Path(partPath.getParent(), pendingPrefix + partPath.getName())
                        .suffix(pendingSuffix);
                Path partInProgressPath = new Path(partPath.getParent(), inProgressPrefix + partPath.getName())
                        .suffix(inProgressSuffix);

                if (fs.exists(partPendingPath)) {
                    LOG.debug(
                            "In-progress file {} has been moved to pending after checkpoint, moving to final location.",
                            partPath);
                    // has been moved to pending in the mean time, rename to final location
                    fs.rename(partPendingPath, partPath);
                } else if (fs.exists(partInProgressPath)) {
                    LOG.debug("In-progress file {} is still in-progress, moving to final location.", partPath);
                    // it was still in progress, rename to final path
                    fs.rename(partInProgressPath, partPath);
                } else if (fs.exists(partPath)) {
                    LOG.debug("In-Progress file {} was already moved to final location {}.",
                            bucketState.currentFile, partPath);
                } else {
                    LOG.debug(
                            "In-Progress file {} was neither moved to pending nor is still in progress. Possibly, "
                                    + "it was moved to final location by a previous snapshot restore",
                            bucketState.currentFile);
                }

                refTruncate = reflectTruncate(fs);
                // truncate it or write a ".valid-length" file to specify up to which point it is valid
                if (refTruncate != null) {
                    LOG.debug("Truncating {} to valid length {}", partPath, bucketState.currentFileValidLength);
                    // some-one else might still hold the lease from a previous try, we are
                    // recovering, after all ...
                    if (fs instanceof DistributedFileSystem) {
                        DistributedFileSystem dfs = (DistributedFileSystem) fs;
                        LOG.debug("Trying to recover file lease {}", partPath);
                        dfs.recoverLease(partPath);
                        boolean isclosed = dfs.isFileClosed(partPath);
                        StopWatch sw = new StopWatch();
                        sw.start();
                        while (!isclosed) {
                            if (sw.getTime() > asyncTimeout) {
                                break;
                            }
                            try {
                                Thread.sleep(500);
                            } catch (InterruptedException e1) {
                                // ignore it
                            }
                            isclosed = dfs.isFileClosed(partPath);
                        }
                    }
                    Boolean truncated = (Boolean) refTruncate.invoke(fs, partPath,
                            bucketState.currentFileValidLength);
                    if (!truncated) {
                        LOG.debug("Truncate did not immediately complete for {}, waiting...", partPath);

                        // we must wait for the asynchronous truncate operation to complete
                        StopWatch sw = new StopWatch();
                        sw.start();
                        long newLen = fs.getFileStatus(partPath).getLen();
                        while (newLen != bucketState.currentFileValidLength) {
                            if (sw.getTime() > asyncTimeout) {
                                break;
                            }
                            try {
                                Thread.sleep(500);
                            } catch (InterruptedException e1) {
                                // ignore it
                            }
                            newLen = fs.getFileStatus(partPath).getLen();
                        }
                        if (newLen != bucketState.currentFileValidLength) {
                            throw new RuntimeException("Truncate did not truncate to right length. Should be "
                                    + bucketState.currentFileValidLength + " is " + newLen + ".");
                        }
                    }

                } else {
                    LOG.debug("Writing valid-length file for {} to specify valid length {}", partPath,
                            bucketState.currentFileValidLength);
                    Path validLengthFilePath = new Path(partPath.getParent(),
                            validLengthPrefix + partPath.getName()).suffix(validLengthSuffix);
                    if (!fs.exists(validLengthFilePath)) {
                        FSDataOutputStream lengthFileOut = fs.create(validLengthFilePath);
                        lengthFileOut.writeUTF(Long.toString(bucketState.currentFileValidLength));
                        lengthFileOut.close();
                    }
                }

                // Now that we've restored the bucket to a valid state, reset the current file info
                bucketState.currentFile = null;
                bucketState.currentFileValidLength = -1;
            } catch (IOException e) {
                LOG.error("Error while restoring BucketingSink state.", e);
                throw new RuntimeException("Error while restoring BucketingSink state.", e);
            } catch (InvocationTargetException | IllegalAccessException e) {
                LOG.error("Cound not invoke truncate.", e);
                throw new RuntimeException("Could not invoke truncate.", e);
            }
        }

        LOG.debug("Clearing pending/in-progress files.");

        // Move files that are confirmed by a checkpoint but did not get moved to final location
        // because the checkpoint notification did not happen before a failure

        Set<Long> pastCheckpointIds = bucketState.pendingFilesPerCheckpoint.keySet();
        LOG.debug("Moving pending files to final location on restore.");
        for (Long pastCheckpointId : pastCheckpointIds) {
            // All the pending files are buckets that have been completed but are waiting to be renamed
            // to their final name
            for (String filename : bucketState.pendingFilesPerCheckpoint.get(pastCheckpointId)) {
                Path finalPath = new Path(filename);
                Path pendingPath = new Path(finalPath.getParent(), pendingPrefix + finalPath.getName())
                        .suffix(pendingSuffix);

                try {
                    if (fs.exists(pendingPath)) {
                        LOG.debug(
                                "(RESTORE) Moving pending file {} to final location after complete checkpoint {}.",
                                pendingPath, pastCheckpointId);
                        fs.rename(pendingPath, finalPath);
                    }
                } catch (IOException e) {
                    LOG.error("(RESTORE) Error while renaming pending file {} to final path {}: {}",
                            pendingPath, finalPath, e);
                    throw new RuntimeException(
                            "Error while renaming pending file " + pendingPath + " to final path " + finalPath,
                            e);
                }
            }
        }

        synchronized (bucketState.pendingFilesPerCheckpoint) {
            bucketState.pendingFilesPerCheckpoint.clear();
        }
    }

    // we need to get this here since open() has not yet been called
    int subtaskIndex = getRuntimeContext().getIndexOfThisSubtask();
    // delete pending files
    try {

        RemoteIterator<LocatedFileStatus> bucketFiles = fs.listFiles(new Path(basePath), true);

        while (bucketFiles.hasNext()) {
            LocatedFileStatus file = bucketFiles.next();
            if (file.getPath().toString().endsWith(pendingSuffix)) {
                // only delete files that contain our subtask index
                if (file.getPath().toString().contains(partPrefix + "-" + subtaskIndex + "-")) {
                    LOG.debug("(RESTORE) Deleting pending file {}", file.getPath().toString());
                    fs.delete(file.getPath(), true);
                }
            }
            if (file.getPath().toString().endsWith(inProgressSuffix)) {
                // only delete files that contain our subtask index
                if (file.getPath().toString().contains(partPrefix + "-" + subtaskIndex + "-")) {
                    LOG.debug("(RESTORE) Deleting in-progress file {}", file.getPath().toString());
                    fs.delete(file.getPath(), true);
                }
            }
        }
    } catch (IOException e) {
        LOG.error("Error while deleting old pending files: {}", e);
        throw new RuntimeException("Error while deleting old pending files.", e);
    }
}