Example usage for org.apache.hadoop.fs FileSystem create

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem create.

Prototype

public FSDataOutputStream create(Path f) throws IOException

Source Link

Document

Create an FSDataOutputStream at the indicated Path.

Usage

From source file:com.twitter.algebra.nmf.DistRndMatrixJob.java

License:Apache License

public static DistributedRowMatrix random(Configuration conf, int rows, int cols, Path tmpPath, String label)
        throws IOException, InterruptedException, ClassNotFoundException {
    Path inputPath = new Path(tmpPath, "null-in");
    Path outputPath = new Path(tmpPath, "Random-" + label + "-" + rows + "-" + cols);
    FileSystem fs = FileSystem.get(outputPath.toUri(), conf);
    DistRndMatrixJob job = new DistRndMatrixJob();
    if (!fs.exists(inputPath)) {
        FSDataOutputStream inFile = fs.create(inputPath);
        inFile.write("NullValue".getBytes());
        inFile.close();//from ww  w .j a v a2 s. co m
    }
    if (!fs.exists(outputPath)) {
        job.run(conf, inputPath, outputPath, rows, cols);
    } else {
        log.warn("----------- Skip already exists: " + outputPath);
    }
    DistributedRowMatrix distRes = new DistributedRowMatrix(outputPath, tmpPath, rows, cols);
    distRes.setConf(conf);
    return distRes;
}

From source file:com.twitter.elephanttwin.util.HdfsUtils.java

License:Apache License

/**
 * Write \n separated lines of text to HDFS as UTF-8.
 *///from  w ww  . j a  v  a  2 s.  c  o m
public static void writeLines(FileSystem fs, Path path, Iterable<String> lines) throws IOException {
    Preconditions.checkNotNull(fs);
    Preconditions.checkNotNull(path);
    Preconditions.checkNotNull(lines);
    Writer stream = new BufferedWriter(new OutputStreamWriter(fs.create(path), "UTF-8"));
    try {
        for (String line : lines) {
            stream.write(line + "\n");
        }
    } finally {
        stream.close();
    }
}

From source file:com.twitter.pig.backend.hadoop.executionengine.tez.TezJobControlCompiler.java

License:Apache License

/**
 * copy the file to hdfs in a temporary path
 * @param pigContext the pig context/*from   ww  w . ja v  a 2 s. c o m*/
 * @param conf the job conf
 * @param url the url to ship to hdfs
 * @return the location where it was shipped
 * @throws IOException
 */
private static Path shipToHDFS(PigContext pigContext, Configuration conf, URL url) throws IOException {

    String path = url.getPath();
    int slash = path.lastIndexOf("/");
    String suffix = slash == -1 ? path : path.substring(slash + 1);

    Path dst = new Path(FileLocalizer.getTemporaryPath(pigContext).toUri().getPath(), suffix);
    FileSystem fs = dst.getFileSystem(conf);
    OutputStream os = fs.create(dst);
    try {
        IOUtils.copyBytes(url.openStream(), os, 4096, true);
    } finally {
        // IOUtils can not close both the input and the output properly in a finally
        // as we can get an exception in between opening the stream and calling the method
        os.close();
    }
    return dst;
}

From source file:com.twitter.pig.backend.hadoop.executionengine.tez.TezLauncher.java

License:Apache License

private void createSuccessFile(Job job, POStore store) throws IOException {
    if (shouldMarkOutputDir(job)) {
        Path outputPath = new Path(store.getSFile().getFileName());
        FileSystem fs = outputPath.getFileSystem(job.getJobConf());
        if (fs.exists(outputPath)) {
            // create a file in the folder to mark it
            Path filePath = new Path(outputPath, SUCCEEDED_FILE_NAME);
            if (!fs.exists(filePath)) {
                fs.create(filePath).close();
            }/* w  ww .  j  a v a  2  s.  c o m*/
        }
    }
}

From source file:com.uber.hoodie.common.table.HoodieTableConfig.java

License:Apache License

/**
 * Initialize the hoodie meta directory and any necessary files inside the meta (including the
 * hoodie.properties)// w ww  .j  a v  a  2  s  .c om
 */
public static void createHoodieProperties(FileSystem fs, Path metadataFolder, Properties properties)
        throws IOException {
    if (!fs.exists(metadataFolder)) {
        fs.mkdirs(metadataFolder);
    }
    Path propertyPath = new Path(metadataFolder, HOODIE_PROPERTIES_FILE);
    try (FSDataOutputStream outputStream = fs.create(propertyPath)) {
        if (!properties.containsKey(HOODIE_TABLE_NAME_PROP_NAME)) {
            throw new IllegalArgumentException(HOODIE_TABLE_NAME_PROP_NAME + " property needs to be specified");
        }
        if (!properties.containsKey(HOODIE_TABLE_TYPE_PROP_NAME)) {
            properties.setProperty(HOODIE_TABLE_TYPE_PROP_NAME, DEFAULT_TABLE_TYPE.name());
        }
        if (properties.getProperty(HOODIE_TABLE_TYPE_PROP_NAME) == HoodieTableType.MERGE_ON_READ.name()
                && !properties.containsKey(HOODIE_PAYLOAD_CLASS_PROP_NAME)) {
            properties.setProperty(HOODIE_PAYLOAD_CLASS_PROP_NAME, DEFAULT_PAYLOAD_CLASS);
        }
        if (!properties.containsKey(HOODIE_ARCHIVELOG_FOLDER_PROP_NAME)) {
            properties.setProperty(HOODIE_ARCHIVELOG_FOLDER_PROP_NAME, DEFAULT_ARCHIVELOG_FOLDER);
        }
        properties.store(outputStream, "Properties saved on " + new Date(System.currentTimeMillis()));
    }
}

From source file:com.vf.flume.sink.hdfs.HDFSDataStream.java

License:Apache License

protected void doOpen(Configuration conf, Path dstPath, FileSystem hdfs) throws IOException {
    if (useRawLocalFileSystem) {
        if (hdfs instanceof LocalFileSystem) {
            hdfs = ((LocalFileSystem) hdfs).getRaw();
        } else {// www  .java2  s.c om
            logger.warn("useRawLocalFileSystem is set to true but file system "
                    + "is not of type LocalFileSystem: " + hdfs.getClass().getName());
        }
    }

    boolean appending = false;
    //    System.out.println(" ------ support-----" + conf.getBoolean("hdfs.append.support", false));
    //    if (conf.getBoolean("hdfs.append.support", false) == true && hdfs.isFile
    //            (dstPath)) {
    if (true == true && hdfs.isFile(dstPath)) {
        outStream = hdfs.append(dstPath);
        appending = true;
    } else {
        outStream = hdfs.create(dstPath);
    }

    serializer = EventSerializerFactory.getInstance(serializerType, serializerContext, outStream);
    if (appending && !serializer.supportsReopen()) {
        outStream.close();
        serializer = null;
        throw new IOException("serializer (" + serializerType + ") does not support append");
    }

    // must call superclass to check for replication issues
    registerCurrentStream(outStream, hdfs, dstPath);

    if (appending) {
        serializer.afterReopen();
    } else {
        serializer.afterCreate();
    }
}

From source file:com.yahoo.glimmer.util.MergeSortTool.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {

    SimpleJSAP jsap = new SimpleJSAP(MergeSortTool.class.getName(),
            "Merges alpha numerically sorted text files on HDFS",
            new Parameter[] {
                    new FlaggedOption(INPUT_ARG, JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'i',
                            INPUT_ARG, "input filenames glob eg. .../part-r-?????/sortedlines.text"),
                    new FlaggedOption(OUTPUT_ARG, JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'o',
                            OUTPUT_ARG, "output filename"),
                    new FlaggedOption(COUNT_ARG, JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'c',
                            COUNT_ARG,/*from   w ww .j av a2 s. c om*/
                            "optionally create a file containing a count of the number of lines merged in text"), });

    JSAPResult jsapResult = jsap.parse(args);
    if (!jsapResult.success()) {
        System.err.print(jsap.getUsage());
        System.exit(1);
    }

    // FileSystem fs = FileSystem.get(getConf());
    // CompressionCodecFactory factory = new
    // CompressionCodecFactory(getConf());
    // mergeSort(fs, sourcePaths, outputPath, factory);

    // Maybe quicker to use a MR job with one reducer.. Currently
    // decompression, merge and compression are all done in this thread..

    Path inputGlobPath = new Path(jsapResult.getString(INPUT_ARG));

    Configuration config = getConf();
    FileSystem fs = FileSystem.get(config);

    FileStatus[] sources = fs.globStatus(inputGlobPath);

    if (sources.length == 0) {
        System.err.println("No files matching input glob:" + inputGlobPath.toString());
        return 1;
    }

    List<Path> sourcePaths = new ArrayList<Path>(sources.length);
    for (FileStatus source : sources) {
        if (source.isDirectory()) {
            System.err.println(source.getPath().toString() + " is a directory.");
            return 1;
        }
        sourcePaths.add(source.getPath());
    }

    Path outputPath = new Path(jsapResult.getString(OUTPUT_ARG));

    CompressionCodecFactory factory = new CompressionCodecFactory(config);

    FSDataOutputStream countsOutputStream = null;
    if (jsapResult.contains(COUNT_ARG)) {
        Path countsPath = null;
        countsPath = new Path(jsapResult.getString(COUNT_ARG));
        countsOutputStream = fs.create(countsPath);
    }

    int lineCount = MergeSortTool.mergeSort(fs, sourcePaths, outputPath, factory);
    System.out.println("Merged " + lineCount + " lines into " + outputPath.toString());
    if (countsOutputStream != null) {
        countsOutputStream.writeBytes("" + lineCount + '\n');
    }
    countsOutputStream.flush();
    countsOutputStream.close();

    return 0;
}

From source file:com.yahoo.glimmer.util.MergeSortTool.java

License:Open Source License

public static int mergeSort(FileSystem fs, List<Path> sourcePaths, Path outputPath,
        CompressionCodecFactory compressionCodecFactory) throws IOException {
    assert sourcePaths.size() > 0 : "No source paths given.";

    LOG.info("Sorted merge into " + outputPath.toString());
    OutputStream outputStream = fs.create(outputPath);

    CompressionCodec inputCompressionCodec = compressionCodecFactory.getCodec(sourcePaths.get(0));
    if (inputCompressionCodec != null) {
        LOG.info("Input compression codec " + inputCompressionCodec.getClass().getName());
    }//from  ww  w  . j a  va  2  s  .c om

    CompressionCodec outputCompressionCodec = compressionCodecFactory.getCodec(outputPath);
    if (outputCompressionCodec != null) {
        LOG.info("Output compression codec " + outputCompressionCodec.getClass().getName());
        outputStream = outputCompressionCodec.createOutputStream(outputStream);
    }

    List<BufferedReader> readers = new ArrayList<BufferedReader>();
    OutputStreamWriter writer = new OutputStreamWriter(outputStream);

    for (Path partPath : sourcePaths) {
        LOG.info("\tAdding source " + partPath.toString());
        InputStream inputStream = fs.open(partPath);
        if (inputCompressionCodec != null) {
            inputStream = inputCompressionCodec.createInputStream(inputStream);
        }
        BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream));
        readers.add(reader);
    }

    int count = ReadersWriterMergeSort.mergeSort(readers, writer);

    writer.close();
    for (BufferedReader reader : readers) {
        reader.close();
    }
    readers.clear();
    LOG.info("Processed " + count + " lines into " + outputPath.toString());
    return count;
}

From source file:com.yahoo.labs.samoa.streams.fs.HDFSFileStreamSourceTest.java

License:Apache License

private void writeSimpleFiles(String path, String ext, int numOfFiles) {
    // get filesystem
    FileSystem dfs;
    try {//from  w w w  .  j  a va  2  s.co  m
        dfs = hdfsCluster.getFileSystem();
    } catch (IOException ioe) {
        fail("Could not access MiniDFSCluster" + ioe.getMessage());
        return;
    }

    // create basedir
    Path basedir = new Path(path);
    try {
        dfs.mkdirs(basedir);
    } catch (IOException ioe) {
        fail("Could not create DIR:" + path + "\n" + ioe.getMessage());
        return;
    }

    // write files
    for (int i = 1; i <= numOfFiles; i++) {
        String fn = null;
        if (ext != null) {
            fn = Integer.toString(i) + "." + ext;
        } else {
            fn = Integer.toString(i);
        }

        try {
            OutputStream fin = dfs.create(new Path(path, fn));
            BufferedWriter wr = new BufferedWriter(new OutputStreamWriter(fin));
            wr.write(Integer.toString(i));
            wr.close();
            fin.close();
        } catch (IOException ioe) {
            fail("Fail writing to input file: " + fn + " in directory: " + path + ioe.getMessage());
        }
    }
}

From source file:com.yahoo.spaclu.data.extract.ExtractFeatureSpark.java

License:Apache License

/**
 * @deprecated//ww w.j  a va 2 s  .  c  o m
 */
public static boolean writeToHDFS(Object object, String fileName) {
    // Create a default hadoop configuration
    Configuration conf = new Configuration();
    // Specifies a new file in HDFS.
    Path filenamePath = new Path(fileName);

    try {
        // Parse created config to the HDFS
        FileSystem fs = FileSystem.get(conf);

        // if the file already exists delete it.
        if (fs.exists(filenamePath)) {
            throw new IOException();
        }

        FSDataOutputStream fos = fs.create(filenamePath);
        ObjectOutputStream oos = new ObjectOutputStream(fos);
        oos.writeObject(object);
        fos.close();
        oos.close();
        return true;
    } catch (IOException ioe) {
        ioe.printStackTrace();
        return false;
    }
}