List of usage examples for org.apache.hadoop.fs FileSystem create
public FSDataOutputStream create(Path f) throws IOException
From source file:ColumnStorage.ColumnProject.java
License:Open Source License
void saveNavigator(FileSystem fs, Path naviPath) throws IOException { int size = infos.size(); if (size == 0) { return;// ww w . j av a2s . c o m } try { FSDataOutputStream out = fs.create(naviPath); out.writeInt(ConstVar.NaviMagic); out.writeShort((short) size); for (int i = 0; i < size; i++) { saveColumnInfo(out, infos.get(i)); } out.close(); } catch (IOException e) { LOG.error("save Column info fail:" + e.getMessage()); } }
From source file:com.acme.io.JsonStorage.java
License:Apache License
/** * Store schema of the data being written * @param schema Schema to be recorded// w w w. j av a 2 s .c o m * @param location Location as returned by * {@link LoadFunc#relativeToAbsolutePath(String, org.apache.hadoop.fs.Path)} * @param job The {@link Job} object - this should be used only to obtain * cluster properties through {@link Job#getConfiguration()} and not to * set/query any runtime job information. * @throws IOException */ public void storeSchema(ResourceSchema schema, String location, Job job) throws IOException { // Store the schema in a side file in the same directory. MapReduce // does not include files starting with "_" when reading data for a job. FileSystem fs = FileSystem.get(job.getConfiguration()); DataOutputStream out = fs.create(new Path(location + "/_schema")); out.writeBytes(schema.toString()); out.writeByte('\n'); out.close(); }
From source file:com.adsame.samelogs.SameLogsSink.java
License:Apache License
@SuppressWarnings("rawtypes") @Override/*from w w w . j a va 2 s .com*/ public void append(Event e) throws IOException { // append the event to the output byte[] fn = e.get(TailSource.A_TAILSRCFILE); byte[] bd = e.getBody(); System.out.println("##" + new String(fn) + "##" + new String(bd)); Map<String, byte[]> maps = e.getAttrs(); Iterator iter = maps.entrySet().iterator(); while (iter.hasNext()) { Map.Entry entry = (Map.Entry) iter.next(); String key = (String) entry.getKey(); System.out.println("key: " + key); } // here we are assuming the body is a string pw.println(new String(e.getBody())); pw.flush(); // so we can see it in the file right away Configuration configuration = new Configuration(); FileSystem hdfsFileSystem = FileSystem.get(configuration); Path path = new Path("hdfs://nodie-Ubuntu4:9000/user/nodie/input/dfs/hello"); FSDataOutputStream out; System.out.println("exists: " + hdfsFileSystem.exists(path)); if (hdfsFileSystem.exists(path)) { out = hdfsFileSystem.append(path); } else { out = hdfsFileSystem.create(path); } out.write(e.getBody()); out.writeChar('\n'); out.flush(); out.close(); }
From source file:com.alectenharmsel.research.LcCounters.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length != 2) { System.err.println("Usage: LineCounter <input> <output>"); System.exit(-1);//www .j a v a 2 s . c om } Job job = new Job(getConf(), "LineCount"); job.setJarByClass(LineCount.class); job.setInputFormatClass(WholeBlockInputFormat.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(LineCountMapper.class); job.setReducerClass(LineCountReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); Configuration check = job.getConfiguration(); boolean success = job.waitForCompletion(true); //Get the counter here, output to a file called total in the dir Counters counters = job.getCounters(); //Throw it in the file Path outPath = new Path(args[1]); FileSystem fs = outPath.getFileSystem(check); OutputStream out = fs.create(new Path(outPath, "total")); String total = counters.findCounter(LcCounters.NUM_LINES).getValue() + "\n"; out.write(total.getBytes()); out.close(); return success ? 0 : 1; }
From source file:com.alexholmes.hadooputils.io.FileUtils.java
License:Apache License
/** * Writes the array list into a file as newline-separated lines. * * @param fs a Hadoop file system//w w w .j a v a2 s. c om * @param p the file path * @return array of lines to write to the file * @throws java.io.IOException if something goes wrong */ public static void writeLines(Collection<?> lines, final FileSystem fs, final Path p) throws IOException { OutputStream stream = fs.create(p); try { IOUtils.writeLines(lines, IOUtils.LINE_SEPARATOR, stream); } finally { stream.close(); } }
From source file:com.alexholmes.hdfsslurper.WorkerThread.java
License:Apache License
private void process(FileStatus srcFileStatus) throws IOException, InterruptedException { Path stagingFile = null;// www. ja va 2 s. c o m FileSystem destFs = null; String filenameBatchidDelimiter = config.getFileNameBatchIdDelimiter(); try { FileSystem srcFs = srcFileStatus.getPath().getFileSystem(config.getConfig()); // run a script which can change the name of the file as well as // write out a new version of the file // if (config.getWorkScript() != null) { Path newSrcFile = stageSource(srcFileStatus); srcFileStatus = srcFileStatus.getPath().getFileSystem(config.getConfig()).getFileStatus(newSrcFile); } Path srcFile = srcFileStatus.getPath(); // get the target HDFS file // Path destFile = getHdfsTargetPath(srcFileStatus); if (config.getCodec() != null) { String ext = config.getCodec().getDefaultExtension(); if (!destFile.getName().endsWith(ext)) { destFile = new Path(destFile.toString() + ext); } } destFs = destFile.getFileSystem(config.getConfig()); // get the staging HDFS file // stagingFile = fileSystemManager.getStagingFile(srcFileStatus, destFile); String batchId = srcFile.toString().substring( srcFile.toString().lastIndexOf(filenameBatchidDelimiter) + 1, srcFile.toString().length()); log.info("event#Copying source file '" + srcFile + "' to staging destination '" + stagingFile + "'" + "$batchId#" + batchId); // if the directory of the target file doesn't exist, attempt to // create it // Path destParentDir = destFile.getParent(); if (!destFs.exists(destParentDir)) { log.info("event#Attempting creation of target directory: " + destParentDir.toUri()); if (!destFs.mkdirs(destParentDir)) { throw new IOException("event#Failed to create target directory: " + destParentDir.toUri()); } } // if the staging directory doesn't exist, attempt to create it // Path destStagingParentDir = stagingFile.getParent(); if (!destFs.exists(destStagingParentDir)) { log.info("event#Attempting creation of staging directory: " + destStagingParentDir.toUri()); if (!destFs.mkdirs(destStagingParentDir)) { throw new IOException("event#Failed to create staging directory: " + destParentDir.toUri()); } } // copy the file // InputStream is = null; OutputStream os = null; CRC32 crc = new CRC32(); try { is = new BufferedInputStream(srcFs.open(srcFile)); if (config.isVerify()) { is = new CheckedInputStream(is, crc); } os = destFs.create(stagingFile); if (config.getCodec() != null) { os = config.getCodec().createOutputStream(os); } IOUtils.copyBytes(is, os, 4096, false); } finally { IOUtils.closeStream(is); IOUtils.closeStream(os); } long srcFileSize = srcFs.getFileStatus(srcFile).getLen(); long destFileSize = destFs.getFileStatus(stagingFile).getLen(); if (config.getCodec() == null && srcFileSize != destFileSize) { throw new IOException( "event#File sizes don't match, source = " + srcFileSize + ", dest = " + destFileSize); } log.info("event#Local file size = " + srcFileSize + ", HDFS file size = " + destFileSize + "$batchId#" + batchId); if (config.isVerify()) { verify(stagingFile, crc.getValue()); } if (destFs.exists(destFile)) { destFs.delete(destFile, false); } log.info("event#Moving staging file '" + stagingFile + "' to destination '" + destFile + "'" + "$batchId#" + batchId); if (!destFs.rename(stagingFile, destFile)) { throw new IOException("event#Failed to rename file"); } if (config.isCreateLzopIndex() && destFile.getName().endsWith(lzopExt)) { Path lzoIndexPath = new Path(destFile.toString() + LzoIndex.LZO_INDEX_SUFFIX); if (destFs.exists(lzoIndexPath)) { log.info("event#Deleting index file as it already exists"); destFs.delete(lzoIndexPath, false); } indexer.index(destFile); } fileSystemManager.fileCopyComplete(srcFileStatus); } catch (Throwable t) { log.error("event#Caught exception working on file " + srcFileStatus.getPath(), t); // delete the staging file if it still exists // try { if (destFs != null && destFs.exists(stagingFile)) { destFs.delete(stagingFile, false); } } catch (Throwable t2) { log.error("event#Failed to delete staging file " + stagingFile, t2); } fileSystemManager.fileCopyError(srcFileStatus); } }
From source file:com.alexholmes.json.mapreduce.ExampleJob.java
License:Apache License
/** * Writes the contents of {@link #JSON} into a file in the job input directory in HDFS. * * @param conf the Hadoop config/*from w ww .ja va 2 s . c o m*/ * @param inputDir the HDFS input directory where we'll write a file * @throws IOException if something goes wrong */ public static void writeInput(Configuration conf, Path inputDir) throws IOException { FileSystem fs = FileSystem.get(conf); if (fs.exists(inputDir)) { throw new IOException( String.format("Input directory '%s' exists - please remove and rerun this example", inputDir)); } OutputStreamWriter writer = new OutputStreamWriter(fs.create(new Path(inputDir, "input.txt"))); writer.write(JSON); IOUtils.closeStream(writer); }
From source file:com.asakusafw.dag.runtime.internalio.HadoopInternalInputTaskInfoTest.java
License:Apache License
private static void put(FileSystem fs, Path path, String... values) throws IOException { try (ModelOutput<Text> out = InternalOutputHandler.create(fs.create(path), Text.class)) { Text buf = new Text(); for (String value : values) { buf.set(value);//ww w . j a va 2 s .c o m out.write(buf); } } }
From source file:com.asakusafw.directio.tools.DirectIoCommandTestRoot.java
License:Apache License
/** * Creates a new indoubt transaction.//from w w w . ja v a 2s.co m * @param executionId target execution id * @throws IOException if failed * @throws InterruptedException if interrupted */ protected void indoubt(String executionId) throws IOException, InterruptedException { Path txPath = HadoopDataSourceUtil.getTransactionInfoPath(conf, executionId); Path cmPath = HadoopDataSourceUtil.getCommitMarkPath(conf, executionId); FileSystem fs = txPath.getFileSystem(conf); fs.create(txPath).close(); fs.create(cmPath).close(); int index = 0; for (String path : repo.getContainerPaths()) { String id = repo.getRelatedId(path); DirectDataSource ds = repo.getRelatedDataSource(path); OutputTransactionContext txContext = HadoopDataSourceUtil.createContext(executionId, id); OutputAttemptContext aContext = new OutputAttemptContext(txContext.getTransactionId(), String.valueOf(index), txContext.getOutputId(), new Counter()); ds.setupTransactionOutput(txContext); ds.setupAttemptOutput(aContext); try (ModelOutput<StringBuilder> output = ds.openOutput(aContext, SimpleDataDefinition.newInstance(StringBuilder.class, new MockFormat()), "", executionId, new Counter())) { output.write(new StringBuilder("Hello, world!")); } ds.commitAttemptOutput(aContext); ds.cleanupAttemptOutput(aContext); index++; } }
From source file:com.asakusafw.m3bp.compiler.tester.externalio.TestIoTaskExecutor.java
License:Apache License
private <T extends Writable> void executeInput(String name, Class<T> dataType, List<Path> paths) throws IOException { Action<Object, Exception> action = inputs.get(name); Invariants.requireNonNull(action, () -> MessageFormat.format("missing input: {0}", name)); Path path = new Path(paths.get(0).toString().replace('*', '_')); FileSystem fs = path.getFileSystem(configuration); try (ModelOutput<T> output = new TemporaryFileOutput<>(fs.create(path), dataType.getName(), OUTPUT_INIT_BUFFER_SIZE, OUTPUT_PAGE_SIZE)) { action.perform(output);/*from w w w. j a va 2 s . co m*/ } catch (Error | RuntimeException | IOException e) { throw e; } catch (Exception e) { throw new AssertionError(e); } }