List of usage examples for org.apache.hadoop.fs FileSystem create
public FSDataOutputStream create(Path f) throws IOException
From source file:com.mvdb.etl.actions.ActionUtils.java
License:Apache License
public static void writeStringToHdfsFile(String str, String hdfsFile) throws IOException { String hdfsHome = getConfigurationValue(ConfigurationKeys.GLOBAL_CUSTOMER, ConfigurationKeys.GLOBAL_HADOOP_HOME); org.apache.hadoop.conf.Configuration conf = new org.apache.hadoop.conf.Configuration(); conf.addResource(new Path(hdfsHome + "/conf/core-site.xml")); FileSystem hdfsFileSystem = FileSystem.get(conf); Path hdfsFilePath = new Path(hdfsFile); if (hdfsFileSystem.exists(hdfsFilePath)) { boolean deleteSuccess = hdfsFileSystem.delete(hdfsFilePath, true); if (deleteSuccess == false) { throw new RuntimeException("Unable to delete " + hdfsFilePath.toString()); }//from w w w .j a v a 2 s. c om } if (hdfsFileSystem.exists(hdfsFilePath)) { throw new RuntimeException("Output " + hdfsFilePath + "already exists"); } logger.info("Copy " + str + " in to " + hdfsFilePath.toString()); FSDataOutputStream out = hdfsFileSystem.create(hdfsFilePath); byte[] bytes = str.getBytes(); out.write(bytes, 0, bytes.length); out.close(); }
From source file:com.mycompany.app.TestStagingDirectoryPermissions.java
License:Apache License
@Test public void perms() throws IOException, InterruptedException { MiniDFSCluster minidfs = null;//from w w w . j a v a 2 s.c om FileSystem fs = null; MiniMRClientCluster minimr = null; try { Configuration conf = new Configuration(true); conf.set("fs.permission.umask-mode", "0077"); minidfs = new MiniDFSCluster.Builder(conf).build(); minidfs.waitActive(); fs = minidfs.getFileSystem(); conf.set(FileSystem.FS_DEFAULT_NAME_KEY, fs.getUri().toString()); Path p = path("/in"); fs.mkdirs(p); FSDataOutputStream os = fs.create(new Path(p, "input.txt")); os.write("hello!".getBytes("UTF-8")); os.close(); String user = UserGroupInformation.getCurrentUser().getUserName(); Path home = new Path("/User/" + user); fs.mkdirs(home); minimr = MiniMRClientClusterFactory.create(this.getClass(), 1, conf); JobConf job = new JobConf(minimr.getConfig()); job.setJobName("PermsTest"); JobClient client = new JobClient(job); FileInputFormat.addInputPath(job, p); FileOutputFormat.setOutputPath(job, path("/out")); job.setInputFormat(TextInputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setMapperClass(MySleepMapper.class); job.setNumReduceTasks(1); RunningJob submittedJob = client.submitJob(job); // Sleep for a bit to let localization finish System.out.println("Sleeping..."); Thread.sleep(3 * 1000l); System.out.println("Done sleeping..."); assertFalse(UserGroupInformation.isSecurityEnabled()); Path stagingRoot = path("/tmp/hadoop-yarn/staging/" + user + "/.staging/"); assertTrue(fs.exists(stagingRoot)); assertEquals(1, fs.listStatus(stagingRoot).length); Path staging = fs.listStatus(stagingRoot)[0].getPath(); Path jobXml = path(staging + "/job.xml"); assertTrue(fs.exists(jobXml)); FileStatus fileStatus = fs.getFileStatus(jobXml); System.out.println("job.xml permission = " + fileStatus.getPermission()); assertTrue(fileStatus.getPermission().getOtherAction().implies(FsAction.READ)); assertTrue(fileStatus.getPermission().getGroupAction().implies(FsAction.READ)); submittedJob.waitForCompletion(); } finally { if (minimr != null) { minimr.stop(); } if (fs != null) { fs.close(); } if (minidfs != null) { minidfs.shutdown(true); } } }
From source file:com.netease.news.classifier.naivebayes.NaiveBayesModel.java
License:Apache License
public void serialize(Path output, Configuration conf) throws IOException { FileSystem fs = output.getFileSystem(conf); FSDataOutputStream out = fs.create(new Path(output, "naiveBayesModel.bin")); try {/* w w w . j a v a 2 s . co m*/ out.writeFloat(alphaI); VectorWritable.writeVector(out, weightsPerFeature); VectorWritable.writeVector(out, weightsPerLabel); VectorWritable.writeVector(out, perlabelThetaNormalizer); for (int row = 0; row < weightsPerLabelAndFeature.numRows(); row++) { VectorWritable.writeVector(out, weightsPerLabelAndFeature.viewRow(row)); } } finally { Closeables.close(out, false); } }
From source file:com.netease.news.utils.SplitInput.java
License:Apache License
/** * Perform a split on the specified input file. Results will be written to files of the same name in the specified * training and test output directories. The {@link #validate()} method is called prior to executing the split. *//*from w w w . jav a 2 s. c o m*/ public void splitFile(Path inputFile) throws IOException { Configuration conf = getConf(); FileSystem fs = inputFile.getFileSystem(conf); if (fs.getFileStatus(inputFile) == null) { throw new IOException(inputFile + " does not exist"); } if (fs.getFileStatus(inputFile).isDir()) { throw new IOException(inputFile + " is a directory"); } validate(); Path testOutputFile = new Path(testOutputDirectory, inputFile.getName()); Path trainingOutputFile = new Path(trainingOutputDirectory, inputFile.getName()); int lineCount = countLines(fs, inputFile, charset); log.info("{} has {} lines", inputFile.getName(), lineCount); int testSplitStart = 0; int testSplitSize = this.testSplitSize; // don't modify state BitSet randomSel = null; if (testRandomSelectionPct > 0 || testRandomSelectionSize > 0) { testSplitSize = this.testRandomSelectionSize; if (testRandomSelectionPct > 0) { testSplitSize = Math.round(lineCount * testRandomSelectionPct / 100.0f); } log.info("{} test split size is {} based on random selection percentage {}", inputFile.getName(), testSplitSize, testRandomSelectionPct); long[] ridx = new long[testSplitSize]; RandomSampler.sample(testSplitSize, lineCount - 1, testSplitSize, 0, ridx, 0, RandomUtils.getRandom()); randomSel = new BitSet(lineCount); for (long idx : ridx) { randomSel.set((int) idx + 1); } } else { if (testSplitPct > 0) { // calculate split size based on percentage testSplitSize = Math.round(lineCount * testSplitPct / 100.0f); log.info("{} test split size is {} based on percentage {}", inputFile.getName(), testSplitSize, testSplitPct); } else { log.info("{} test split size is {}", inputFile.getName(), testSplitSize); } if (splitLocation > 0) { // calculate start of split based on percentage testSplitStart = Math.round(lineCount * splitLocation / 100.0f); if (lineCount - testSplitStart < testSplitSize) { // adjust split start downwards based on split size. testSplitStart = lineCount - testSplitSize; } log.info("{} test split start is {} based on split location {}", inputFile.getName(), testSplitStart, splitLocation); } if (testSplitStart < 0) { throw new IllegalArgumentException( "test split size for " + inputFile + " is too large, it would produce an " + "empty training set from the initial set of " + lineCount + " examples"); } else if (lineCount - testSplitSize < testSplitSize) { log.warn( "Test set size for {} may be too large, {} is larger than the number of " + "lines remaining in the training set: {}", inputFile, testSplitSize, lineCount - testSplitSize); } } int trainCount = 0; int testCount = 0; if (!useSequence) { BufferedReader reader = new BufferedReader(new InputStreamReader(fs.open(inputFile), charset)); Writer trainingWriter = new OutputStreamWriter(fs.create(trainingOutputFile), charset); Writer testWriter = new OutputStreamWriter(fs.create(testOutputFile), charset); try { String line; int pos = 0; while ((line = reader.readLine()) != null) { pos++; Writer writer; if (testRandomSelectionPct > 0) { // Randomly choose writer = randomSel.get(pos) ? testWriter : trainingWriter; } else { // Choose based on location writer = pos > testSplitStart ? testWriter : trainingWriter; } if (writer == testWriter) { if (testCount >= testSplitSize) { writer = trainingWriter; } else { testCount++; } } if (writer == trainingWriter) { trainCount++; } writer.write(line); writer.write('\n'); } } finally { Closeables.close(reader, true); Closeables.close(trainingWriter, false); Closeables.close(testWriter, false); } } else { SequenceFileIterator<Writable, Writable> iterator = new SequenceFileIterator<Writable, Writable>( inputFile, false, fs.getConf()); SequenceFile.Writer trainingWriter = SequenceFile.createWriter(fs, fs.getConf(), trainingOutputFile, iterator.getKeyClass(), iterator.getValueClass()); SequenceFile.Writer testWriter = SequenceFile.createWriter(fs, fs.getConf(), testOutputFile, iterator.getKeyClass(), iterator.getValueClass()); try { int pos = 0; while (iterator.hasNext()) { pos++; SequenceFile.Writer writer; if (testRandomSelectionPct > 0) { // Randomly choose writer = randomSel.get(pos) ? testWriter : trainingWriter; } else { // Choose based on location writer = pos > testSplitStart ? testWriter : trainingWriter; } if (writer == testWriter) { if (testCount >= testSplitSize) { writer = trainingWriter; } else { testCount++; } } if (writer == trainingWriter) { trainCount++; } Pair<Writable, Writable> pair = iterator.next(); writer.append(pair.getFirst(), pair.getSecond()); } } finally { Closeables.close(iterator, true); Closeables.close(trainingWriter, false); Closeables.close(testWriter, false); } } log.info("file: {}, input: {} train: {}, test: {} starting at {}", inputFile.getName(), lineCount, trainCount, testCount, testSplitStart); // testing; if (callback != null) { callback.splitComplete(inputFile, lineCount, trainCount, testCount, testSplitStart); } }
From source file:com.netflix.aegisthus.tools.StorageHelper.java
License:Apache License
public void logCommit(String file) throws IOException { Path log = commitPath(getTaskId()); if (debug) {// w ww .j a va 2s .co m LOG.info(String.format("logging (%s) to commit log (%s)", file, log.toUri().toString())); } FileSystem fs = log.getFileSystem(config); DataOutputStream os = null; if (fs.exists(log)) { os = fs.append(log); } else { os = fs.create(log); } os.writeBytes(file); os.write('\n'); os.close(); }
From source file:com.netflix.bdp.s3.TestS3MultipartOutputCommitter.java
License:Apache License
private static Path writeOutputFile(TaskAttemptID id, Path dest, String content, long copies) throws IOException { String fileName = ((id.getTaskType() == TaskType.REDUCE) ? "r_" : "m_") + id.getTaskID().getId() + "_" + id.getId() + "_" + UUID.randomUUID().toString(); Path outPath = new Path(dest, fileName); FileSystem fs = outPath.getFileSystem(getConfiguration()); try (OutputStream out = fs.create(outPath)) { byte[] bytes = content.getBytes(StandardCharsets.UTF_8); for (int i = 0; i < copies; i += 1) { out.write(bytes);/*from w ww.j ava 2s . c o m*/ } } return outPath; }
From source file:com.netflix.bdp.s3.TestS3PartitionedFileListing.java
License:Apache License
@Test public void testTaskOutputListing() throws Exception { S3PartitionedOutputCommitter committer = newTaskCommitter(); // create files in the attempt path that should be found by getTaskOutput Path attemptPath = committer.getTaskAttemptPath(getTAC()); FileSystem attemptFS = attemptPath.getFileSystem(getTAC().getConfiguration()); attemptFS.delete(attemptPath, true); List<String> expectedFiles = Lists.newArrayList(); for (String dateint : Arrays.asList("20161115", "20161116")) { for (String hour : Arrays.asList("13", "14")) { String relative = "dateint=" + dateint + "/hour=" + hour + "/" + UUID.randomUUID().toString() + ".parquet"; expectedFiles.add(relative); attemptFS.create(new Path(attemptPath, relative)).close(); }/*ww w . j ava2 s. c o m*/ } List<FileStatus> attemptFiles = committer.getTaskOutput(getTAC()); List<String> actualFiles = Lists.newArrayList(); for (FileStatus stat : attemptFiles) { String relative = getRelativePath(attemptPath, stat.getPath()); actualFiles.add(relative); } Assert.assertEquals("File sets should match", expectedFiles, actualFiles); attemptFS.delete(attemptPath, true); }
From source file:com.netflix.bdp.s3.TestS3PartitionedFileListing.java
License:Apache License
@Test public void testTaskOutputListingWithHiddenFiles() throws Exception { S3PartitionedOutputCommitter committer = newTaskCommitter(); // create files in the attempt path that should be found by getTaskOutput Path attemptPath = committer.getTaskAttemptPath(getTAC()); FileSystem attemptFS = attemptPath.getFileSystem(getTAC().getConfiguration()); attemptFS.delete(attemptPath, true); List<String> expectedFiles = Lists.newArrayList(); for (String dateint : Arrays.asList("20161115", "20161116")) { String metadata = "dateint=" + dateint + "/" + "_metadata"; attemptFS.create(new Path(attemptPath, metadata)).close(); for (String hour : Arrays.asList("13", "14")) { String relative = "dateint=" + dateint + "/hour=" + hour + "/" + UUID.randomUUID().toString() + ".parquet"; expectedFiles.add(relative); attemptFS.create(new Path(attemptPath, relative)).close(); String partial = "dateint=" + dateint + "/hour=" + hour + "/." + UUID.randomUUID().toString() + ".partial"; attemptFS.create(new Path(attemptPath, partial)).close(); }/*from ww w . ja va 2 s . co m*/ } List<FileStatus> attemptFiles = committer.getTaskOutput(getTAC()); List<String> actualFiles = Lists.newArrayList(); for (FileStatus stat : attemptFiles) { String relative = getRelativePath(attemptPath, stat.getPath()); actualFiles.add(relative); } Assert.assertEquals("File sets should match", expectedFiles, actualFiles); attemptFS.delete(attemptPath, true); }
From source file:com.netflix.bdp.s3.TestUtil.java
License:Apache License
public static void createTestOutputFiles(List<String> relativeFiles, Path attemptPath, Configuration conf) throws Exception { // create files in the attempt path that should be found by getTaskOutput FileSystem attemptFS = attemptPath.getFileSystem(conf); attemptFS.delete(attemptPath, true); for (String relative : relativeFiles) { // 0-length files are ignored, so write at least one byte OutputStream out = attemptFS.create(new Path(attemptPath, relative)); out.write(34);//from w w w. j a v a 2s . co m out.close(); } }
From source file:com.netflix.bdp.s3mper.alert.impl.AlertJanitor.java
License:Apache License
/** * Writes out logs to the given path as a separate JSON message per line. * //from ww w.ja v a 2s.co m * @param queue * @param path * @throws IOException */ public void writeLogs(String queue, Path path) throws IOException { FileSystem fs = FileSystem.get(path.toUri(), conf); DataOutputStream fout = fs.create(path); do { List<Message> messages = pull(queue, batchCount); if (messages.isEmpty()) { break; } for (Message m : messages) { fout.write((m.getBody().replaceAll("[\n|\r]", " ") + "\n").getBytes("UTF8")); } delete(queue, messages); } while (true); fout.close(); fs.close(); }