Example usage for org.apache.hadoop.fs FileSystem create

List of usage examples for org.apache.hadoop.fs FileSystem create

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem create.

Prototype

public FSDataOutputStream create(Path f) throws IOException 

Source Link

Document

Create an FSDataOutputStream at the indicated Path.

Usage

From source file:com.mvdb.etl.actions.ActionUtils.java

License:Apache License

public static void writeStringToHdfsFile(String str, String hdfsFile) throws IOException {

    String hdfsHome = getConfigurationValue(ConfigurationKeys.GLOBAL_CUSTOMER,
            ConfigurationKeys.GLOBAL_HADOOP_HOME);
    org.apache.hadoop.conf.Configuration conf = new org.apache.hadoop.conf.Configuration();
    conf.addResource(new Path(hdfsHome + "/conf/core-site.xml"));
    FileSystem hdfsFileSystem = FileSystem.get(conf);

    Path hdfsFilePath = new Path(hdfsFile);

    if (hdfsFileSystem.exists(hdfsFilePath)) {
        boolean deleteSuccess = hdfsFileSystem.delete(hdfsFilePath, true);
        if (deleteSuccess == false) {
            throw new RuntimeException("Unable to delete " + hdfsFilePath.toString());
        }//from  w w  w  .j  a  v a 2  s.  c  om
    }

    if (hdfsFileSystem.exists(hdfsFilePath)) {
        throw new RuntimeException("Output " + hdfsFilePath + "already exists");
    }

    logger.info("Copy " + str + " in to " + hdfsFilePath.toString());

    FSDataOutputStream out = hdfsFileSystem.create(hdfsFilePath);
    byte[] bytes = str.getBytes();
    out.write(bytes, 0, bytes.length);
    out.close();

}

From source file:com.mycompany.app.TestStagingDirectoryPermissions.java

License:Apache License

@Test
public void perms() throws IOException, InterruptedException {
    MiniDFSCluster minidfs = null;//from   w  w w . j  a v  a  2 s.c  om
    FileSystem fs = null;
    MiniMRClientCluster minimr = null;
    try {
        Configuration conf = new Configuration(true);
        conf.set("fs.permission.umask-mode", "0077");
        minidfs = new MiniDFSCluster.Builder(conf).build();
        minidfs.waitActive();

        fs = minidfs.getFileSystem();
        conf.set(FileSystem.FS_DEFAULT_NAME_KEY, fs.getUri().toString());
        Path p = path("/in");
        fs.mkdirs(p);

        FSDataOutputStream os = fs.create(new Path(p, "input.txt"));
        os.write("hello!".getBytes("UTF-8"));
        os.close();

        String user = UserGroupInformation.getCurrentUser().getUserName();
        Path home = new Path("/User/" + user);
        fs.mkdirs(home);
        minimr = MiniMRClientClusterFactory.create(this.getClass(), 1, conf);
        JobConf job = new JobConf(minimr.getConfig());

        job.setJobName("PermsTest");
        JobClient client = new JobClient(job);
        FileInputFormat.addInputPath(job, p);
        FileOutputFormat.setOutputPath(job, path("/out"));
        job.setInputFormat(TextInputFormat.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        job.setMapperClass(MySleepMapper.class);

        job.setNumReduceTasks(1);
        RunningJob submittedJob = client.submitJob(job);

        // Sleep for a bit to let localization finish
        System.out.println("Sleeping...");
        Thread.sleep(3 * 1000l);
        System.out.println("Done sleeping...");
        assertFalse(UserGroupInformation.isSecurityEnabled());

        Path stagingRoot = path("/tmp/hadoop-yarn/staging/" + user + "/.staging/");
        assertTrue(fs.exists(stagingRoot));
        assertEquals(1, fs.listStatus(stagingRoot).length);
        Path staging = fs.listStatus(stagingRoot)[0].getPath();
        Path jobXml = path(staging + "/job.xml");

        assertTrue(fs.exists(jobXml));

        FileStatus fileStatus = fs.getFileStatus(jobXml);
        System.out.println("job.xml permission = " + fileStatus.getPermission());
        assertTrue(fileStatus.getPermission().getOtherAction().implies(FsAction.READ));
        assertTrue(fileStatus.getPermission().getGroupAction().implies(FsAction.READ));

        submittedJob.waitForCompletion();
    } finally {
        if (minimr != null) {
            minimr.stop();
        }
        if (fs != null) {
            fs.close();
        }
        if (minidfs != null) {
            minidfs.shutdown(true);
        }
    }
}

From source file:com.netease.news.classifier.naivebayes.NaiveBayesModel.java

License:Apache License

public void serialize(Path output, Configuration conf) throws IOException {
    FileSystem fs = output.getFileSystem(conf);
    FSDataOutputStream out = fs.create(new Path(output, "naiveBayesModel.bin"));
    try {/*  w  w w . j a v a  2  s  . co  m*/
        out.writeFloat(alphaI);
        VectorWritable.writeVector(out, weightsPerFeature);
        VectorWritable.writeVector(out, weightsPerLabel);
        VectorWritable.writeVector(out, perlabelThetaNormalizer);
        for (int row = 0; row < weightsPerLabelAndFeature.numRows(); row++) {
            VectorWritable.writeVector(out, weightsPerLabelAndFeature.viewRow(row));
        }
    } finally {
        Closeables.close(out, false);
    }
}

From source file:com.netease.news.utils.SplitInput.java

License:Apache License

/**
 * Perform a split on the specified input file. Results will be written to files of the same name in the specified
 * training and test output directories. The {@link #validate()} method is called prior to executing the split.
 *//*from w  w  w  .  jav  a  2 s.  c  o m*/
public void splitFile(Path inputFile) throws IOException {
    Configuration conf = getConf();
    FileSystem fs = inputFile.getFileSystem(conf);
    if (fs.getFileStatus(inputFile) == null) {
        throw new IOException(inputFile + " does not exist");
    }
    if (fs.getFileStatus(inputFile).isDir()) {
        throw new IOException(inputFile + " is a directory");
    }

    validate();

    Path testOutputFile = new Path(testOutputDirectory, inputFile.getName());
    Path trainingOutputFile = new Path(trainingOutputDirectory, inputFile.getName());

    int lineCount = countLines(fs, inputFile, charset);

    log.info("{} has {} lines", inputFile.getName(), lineCount);

    int testSplitStart = 0;
    int testSplitSize = this.testSplitSize; // don't modify state
    BitSet randomSel = null;

    if (testRandomSelectionPct > 0 || testRandomSelectionSize > 0) {
        testSplitSize = this.testRandomSelectionSize;

        if (testRandomSelectionPct > 0) {
            testSplitSize = Math.round(lineCount * testRandomSelectionPct / 100.0f);
        }
        log.info("{} test split size is {} based on random selection percentage {}", inputFile.getName(),
                testSplitSize, testRandomSelectionPct);
        long[] ridx = new long[testSplitSize];
        RandomSampler.sample(testSplitSize, lineCount - 1, testSplitSize, 0, ridx, 0, RandomUtils.getRandom());
        randomSel = new BitSet(lineCount);
        for (long idx : ridx) {
            randomSel.set((int) idx + 1);
        }
    } else {
        if (testSplitPct > 0) { // calculate split size based on percentage
            testSplitSize = Math.round(lineCount * testSplitPct / 100.0f);
            log.info("{} test split size is {} based on percentage {}", inputFile.getName(), testSplitSize,
                    testSplitPct);
        } else {
            log.info("{} test split size is {}", inputFile.getName(), testSplitSize);
        }

        if (splitLocation > 0) { // calculate start of split based on percentage
            testSplitStart = Math.round(lineCount * splitLocation / 100.0f);
            if (lineCount - testSplitStart < testSplitSize) {
                // adjust split start downwards based on split size.
                testSplitStart = lineCount - testSplitSize;
            }
            log.info("{} test split start is {} based on split location {}", inputFile.getName(),
                    testSplitStart, splitLocation);
        }

        if (testSplitStart < 0) {
            throw new IllegalArgumentException(
                    "test split size for " + inputFile + " is too large, it would produce an "
                            + "empty training set from the initial set of " + lineCount + " examples");
        } else if (lineCount - testSplitSize < testSplitSize) {
            log.warn(
                    "Test set size for {} may be too large, {} is larger than the number of "
                            + "lines remaining in the training set: {}",
                    inputFile, testSplitSize, lineCount - testSplitSize);
        }
    }
    int trainCount = 0;
    int testCount = 0;
    if (!useSequence) {
        BufferedReader reader = new BufferedReader(new InputStreamReader(fs.open(inputFile), charset));
        Writer trainingWriter = new OutputStreamWriter(fs.create(trainingOutputFile), charset);
        Writer testWriter = new OutputStreamWriter(fs.create(testOutputFile), charset);

        try {

            String line;
            int pos = 0;
            while ((line = reader.readLine()) != null) {
                pos++;

                Writer writer;
                if (testRandomSelectionPct > 0) { // Randomly choose
                    writer = randomSel.get(pos) ? testWriter : trainingWriter;
                } else { // Choose based on location
                    writer = pos > testSplitStart ? testWriter : trainingWriter;
                }

                if (writer == testWriter) {
                    if (testCount >= testSplitSize) {
                        writer = trainingWriter;
                    } else {
                        testCount++;
                    }
                }
                if (writer == trainingWriter) {
                    trainCount++;
                }
                writer.write(line);
                writer.write('\n');
            }

        } finally {
            Closeables.close(reader, true);
            Closeables.close(trainingWriter, false);
            Closeables.close(testWriter, false);
        }
    } else {
        SequenceFileIterator<Writable, Writable> iterator = new SequenceFileIterator<Writable, Writable>(
                inputFile, false, fs.getConf());
        SequenceFile.Writer trainingWriter = SequenceFile.createWriter(fs, fs.getConf(), trainingOutputFile,
                iterator.getKeyClass(), iterator.getValueClass());
        SequenceFile.Writer testWriter = SequenceFile.createWriter(fs, fs.getConf(), testOutputFile,
                iterator.getKeyClass(), iterator.getValueClass());
        try {

            int pos = 0;
            while (iterator.hasNext()) {
                pos++;
                SequenceFile.Writer writer;
                if (testRandomSelectionPct > 0) { // Randomly choose
                    writer = randomSel.get(pos) ? testWriter : trainingWriter;
                } else { // Choose based on location
                    writer = pos > testSplitStart ? testWriter : trainingWriter;
                }

                if (writer == testWriter) {
                    if (testCount >= testSplitSize) {
                        writer = trainingWriter;
                    } else {
                        testCount++;
                    }
                }
                if (writer == trainingWriter) {
                    trainCount++;
                }
                Pair<Writable, Writable> pair = iterator.next();
                writer.append(pair.getFirst(), pair.getSecond());
            }

        } finally {
            Closeables.close(iterator, true);
            Closeables.close(trainingWriter, false);
            Closeables.close(testWriter, false);
        }
    }
    log.info("file: {}, input: {} train: {}, test: {} starting at {}", inputFile.getName(), lineCount,
            trainCount, testCount, testSplitStart);

    // testing;
    if (callback != null) {
        callback.splitComplete(inputFile, lineCount, trainCount, testCount, testSplitStart);
    }
}

From source file:com.netflix.aegisthus.tools.StorageHelper.java

License:Apache License

public void logCommit(String file) throws IOException {
    Path log = commitPath(getTaskId());
    if (debug) {//  w ww  .j  a  va  2s  .co m
        LOG.info(String.format("logging (%s) to commit log (%s)", file, log.toUri().toString()));
    }
    FileSystem fs = log.getFileSystem(config);
    DataOutputStream os = null;
    if (fs.exists(log)) {
        os = fs.append(log);
    } else {
        os = fs.create(log);
    }
    os.writeBytes(file);
    os.write('\n');
    os.close();
}

From source file:com.netflix.bdp.s3.TestS3MultipartOutputCommitter.java

License:Apache License

private static Path writeOutputFile(TaskAttemptID id, Path dest, String content, long copies)
        throws IOException {
    String fileName = ((id.getTaskType() == TaskType.REDUCE) ? "r_" : "m_") + id.getTaskID().getId() + "_"
            + id.getId() + "_" + UUID.randomUUID().toString();
    Path outPath = new Path(dest, fileName);
    FileSystem fs = outPath.getFileSystem(getConfiguration());

    try (OutputStream out = fs.create(outPath)) {
        byte[] bytes = content.getBytes(StandardCharsets.UTF_8);
        for (int i = 0; i < copies; i += 1) {
            out.write(bytes);/*from   w ww.j  ava 2s . c  o m*/
        }
    }

    return outPath;
}

From source file:com.netflix.bdp.s3.TestS3PartitionedFileListing.java

License:Apache License

@Test
public void testTaskOutputListing() throws Exception {
    S3PartitionedOutputCommitter committer = newTaskCommitter();

    // create files in the attempt path that should be found by getTaskOutput
    Path attemptPath = committer.getTaskAttemptPath(getTAC());
    FileSystem attemptFS = attemptPath.getFileSystem(getTAC().getConfiguration());
    attemptFS.delete(attemptPath, true);

    List<String> expectedFiles = Lists.newArrayList();
    for (String dateint : Arrays.asList("20161115", "20161116")) {
        for (String hour : Arrays.asList("13", "14")) {
            String relative = "dateint=" + dateint + "/hour=" + hour + "/" + UUID.randomUUID().toString()
                    + ".parquet";
            expectedFiles.add(relative);
            attemptFS.create(new Path(attemptPath, relative)).close();
        }/*ww w . j  ava2 s. c  o m*/
    }

    List<FileStatus> attemptFiles = committer.getTaskOutput(getTAC());
    List<String> actualFiles = Lists.newArrayList();
    for (FileStatus stat : attemptFiles) {
        String relative = getRelativePath(attemptPath, stat.getPath());
        actualFiles.add(relative);
    }

    Assert.assertEquals("File sets should match", expectedFiles, actualFiles);

    attemptFS.delete(attemptPath, true);
}

From source file:com.netflix.bdp.s3.TestS3PartitionedFileListing.java

License:Apache License

@Test
public void testTaskOutputListingWithHiddenFiles() throws Exception {
    S3PartitionedOutputCommitter committer = newTaskCommitter();

    // create files in the attempt path that should be found by getTaskOutput
    Path attemptPath = committer.getTaskAttemptPath(getTAC());
    FileSystem attemptFS = attemptPath.getFileSystem(getTAC().getConfiguration());
    attemptFS.delete(attemptPath, true);

    List<String> expectedFiles = Lists.newArrayList();
    for (String dateint : Arrays.asList("20161115", "20161116")) {
        String metadata = "dateint=" + dateint + "/" + "_metadata";
        attemptFS.create(new Path(attemptPath, metadata)).close();

        for (String hour : Arrays.asList("13", "14")) {
            String relative = "dateint=" + dateint + "/hour=" + hour + "/" + UUID.randomUUID().toString()
                    + ".parquet";
            expectedFiles.add(relative);
            attemptFS.create(new Path(attemptPath, relative)).close();

            String partial = "dateint=" + dateint + "/hour=" + hour + "/." + UUID.randomUUID().toString()
                    + ".partial";
            attemptFS.create(new Path(attemptPath, partial)).close();
        }/*from  ww w  .  ja  va 2 s .  co m*/
    }

    List<FileStatus> attemptFiles = committer.getTaskOutput(getTAC());
    List<String> actualFiles = Lists.newArrayList();
    for (FileStatus stat : attemptFiles) {
        String relative = getRelativePath(attemptPath, stat.getPath());
        actualFiles.add(relative);
    }

    Assert.assertEquals("File sets should match", expectedFiles, actualFiles);

    attemptFS.delete(attemptPath, true);
}

From source file:com.netflix.bdp.s3.TestUtil.java

License:Apache License

public static void createTestOutputFiles(List<String> relativeFiles, Path attemptPath, Configuration conf)
        throws Exception {
    // create files in the attempt path that should be found by getTaskOutput
    FileSystem attemptFS = attemptPath.getFileSystem(conf);
    attemptFS.delete(attemptPath, true);
    for (String relative : relativeFiles) {
        // 0-length files are ignored, so write at least one byte
        OutputStream out = attemptFS.create(new Path(attemptPath, relative));
        out.write(34);//from   w  w w. j  a v a 2s .  co  m
        out.close();
    }
}

From source file:com.netflix.bdp.s3mper.alert.impl.AlertJanitor.java

License:Apache License

/**
 * Writes out logs to the given path as a separate JSON message per line.
 * //from ww  w.ja v a 2s.co  m
 * @param queue
 * @param path
 * @throws IOException 
 */
public void writeLogs(String queue, Path path) throws IOException {
    FileSystem fs = FileSystem.get(path.toUri(), conf);
    DataOutputStream fout = fs.create(path);

    do {
        List<Message> messages = pull(queue, batchCount);

        if (messages.isEmpty()) {
            break;
        }

        for (Message m : messages) {
            fout.write((m.getBody().replaceAll("[\n|\r]", " ") + "\n").getBytes("UTF8"));
        }

        delete(queue, messages);
    } while (true);

    fout.close();
    fs.close();
}