List of usage examples for org.apache.hadoop.fs FileSystem create
public FSDataOutputStream create(Path f) throws IOException
From source file:com.twitter.algebra.nmf.DistRndMatrixJob.java
License:Apache License
public static DistributedRowMatrix random(Configuration conf, int rows, int cols, Path tmpPath, String label) throws IOException, InterruptedException, ClassNotFoundException { Path inputPath = new Path(tmpPath, "null-in"); Path outputPath = new Path(tmpPath, "Random-" + label + "-" + rows + "-" + cols); FileSystem fs = FileSystem.get(outputPath.toUri(), conf); DistRndMatrixJob job = new DistRndMatrixJob(); if (!fs.exists(inputPath)) { FSDataOutputStream inFile = fs.create(inputPath); inFile.write("NullValue".getBytes()); inFile.close();//from ww w .j a v a2 s. co m } if (!fs.exists(outputPath)) { job.run(conf, inputPath, outputPath, rows, cols); } else { log.warn("----------- Skip already exists: " + outputPath); } DistributedRowMatrix distRes = new DistributedRowMatrix(outputPath, tmpPath, rows, cols); distRes.setConf(conf); return distRes; }
From source file:com.twitter.elephanttwin.util.HdfsUtils.java
License:Apache License
/** * Write \n separated lines of text to HDFS as UTF-8. *///from w ww . j a v a 2 s. c o m public static void writeLines(FileSystem fs, Path path, Iterable<String> lines) throws IOException { Preconditions.checkNotNull(fs); Preconditions.checkNotNull(path); Preconditions.checkNotNull(lines); Writer stream = new BufferedWriter(new OutputStreamWriter(fs.create(path), "UTF-8")); try { for (String line : lines) { stream.write(line + "\n"); } } finally { stream.close(); } }
From source file:com.twitter.pig.backend.hadoop.executionengine.tez.TezJobControlCompiler.java
License:Apache License
/** * copy the file to hdfs in a temporary path * @param pigContext the pig context/*from ww w . ja v a 2 s. c o m*/ * @param conf the job conf * @param url the url to ship to hdfs * @return the location where it was shipped * @throws IOException */ private static Path shipToHDFS(PigContext pigContext, Configuration conf, URL url) throws IOException { String path = url.getPath(); int slash = path.lastIndexOf("/"); String suffix = slash == -1 ? path : path.substring(slash + 1); Path dst = new Path(FileLocalizer.getTemporaryPath(pigContext).toUri().getPath(), suffix); FileSystem fs = dst.getFileSystem(conf); OutputStream os = fs.create(dst); try { IOUtils.copyBytes(url.openStream(), os, 4096, true); } finally { // IOUtils can not close both the input and the output properly in a finally // as we can get an exception in between opening the stream and calling the method os.close(); } return dst; }
From source file:com.twitter.pig.backend.hadoop.executionengine.tez.TezLauncher.java
License:Apache License
private void createSuccessFile(Job job, POStore store) throws IOException { if (shouldMarkOutputDir(job)) { Path outputPath = new Path(store.getSFile().getFileName()); FileSystem fs = outputPath.getFileSystem(job.getJobConf()); if (fs.exists(outputPath)) { // create a file in the folder to mark it Path filePath = new Path(outputPath, SUCCEEDED_FILE_NAME); if (!fs.exists(filePath)) { fs.create(filePath).close(); }/* w ww . j a v a 2 s. c o m*/ } } }
From source file:com.uber.hoodie.common.table.HoodieTableConfig.java
License:Apache License
/** * Initialize the hoodie meta directory and any necessary files inside the meta (including the * hoodie.properties)// w ww .j a v a 2 s .c om */ public static void createHoodieProperties(FileSystem fs, Path metadataFolder, Properties properties) throws IOException { if (!fs.exists(metadataFolder)) { fs.mkdirs(metadataFolder); } Path propertyPath = new Path(metadataFolder, HOODIE_PROPERTIES_FILE); try (FSDataOutputStream outputStream = fs.create(propertyPath)) { if (!properties.containsKey(HOODIE_TABLE_NAME_PROP_NAME)) { throw new IllegalArgumentException(HOODIE_TABLE_NAME_PROP_NAME + " property needs to be specified"); } if (!properties.containsKey(HOODIE_TABLE_TYPE_PROP_NAME)) { properties.setProperty(HOODIE_TABLE_TYPE_PROP_NAME, DEFAULT_TABLE_TYPE.name()); } if (properties.getProperty(HOODIE_TABLE_TYPE_PROP_NAME) == HoodieTableType.MERGE_ON_READ.name() && !properties.containsKey(HOODIE_PAYLOAD_CLASS_PROP_NAME)) { properties.setProperty(HOODIE_PAYLOAD_CLASS_PROP_NAME, DEFAULT_PAYLOAD_CLASS); } if (!properties.containsKey(HOODIE_ARCHIVELOG_FOLDER_PROP_NAME)) { properties.setProperty(HOODIE_ARCHIVELOG_FOLDER_PROP_NAME, DEFAULT_ARCHIVELOG_FOLDER); } properties.store(outputStream, "Properties saved on " + new Date(System.currentTimeMillis())); } }
From source file:com.vf.flume.sink.hdfs.HDFSDataStream.java
License:Apache License
protected void doOpen(Configuration conf, Path dstPath, FileSystem hdfs) throws IOException { if (useRawLocalFileSystem) { if (hdfs instanceof LocalFileSystem) { hdfs = ((LocalFileSystem) hdfs).getRaw(); } else {// www .java2 s.c om logger.warn("useRawLocalFileSystem is set to true but file system " + "is not of type LocalFileSystem: " + hdfs.getClass().getName()); } } boolean appending = false; // System.out.println(" ------ support-----" + conf.getBoolean("hdfs.append.support", false)); // if (conf.getBoolean("hdfs.append.support", false) == true && hdfs.isFile // (dstPath)) { if (true == true && hdfs.isFile(dstPath)) { outStream = hdfs.append(dstPath); appending = true; } else { outStream = hdfs.create(dstPath); } serializer = EventSerializerFactory.getInstance(serializerType, serializerContext, outStream); if (appending && !serializer.supportsReopen()) { outStream.close(); serializer = null; throw new IOException("serializer (" + serializerType + ") does not support append"); } // must call superclass to check for replication issues registerCurrentStream(outStream, hdfs, dstPath); if (appending) { serializer.afterReopen(); } else { serializer.afterCreate(); } }
From source file:com.yahoo.glimmer.util.MergeSortTool.java
License:Open Source License
@Override public int run(String[] args) throws Exception { SimpleJSAP jsap = new SimpleJSAP(MergeSortTool.class.getName(), "Merges alpha numerically sorted text files on HDFS", new Parameter[] { new FlaggedOption(INPUT_ARG, JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'i', INPUT_ARG, "input filenames glob eg. .../part-r-?????/sortedlines.text"), new FlaggedOption(OUTPUT_ARG, JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'o', OUTPUT_ARG, "output filename"), new FlaggedOption(COUNT_ARG, JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'c', COUNT_ARG,/*from w ww .j av a2 s. c om*/ "optionally create a file containing a count of the number of lines merged in text"), }); JSAPResult jsapResult = jsap.parse(args); if (!jsapResult.success()) { System.err.print(jsap.getUsage()); System.exit(1); } // FileSystem fs = FileSystem.get(getConf()); // CompressionCodecFactory factory = new // CompressionCodecFactory(getConf()); // mergeSort(fs, sourcePaths, outputPath, factory); // Maybe quicker to use a MR job with one reducer.. Currently // decompression, merge and compression are all done in this thread.. Path inputGlobPath = new Path(jsapResult.getString(INPUT_ARG)); Configuration config = getConf(); FileSystem fs = FileSystem.get(config); FileStatus[] sources = fs.globStatus(inputGlobPath); if (sources.length == 0) { System.err.println("No files matching input glob:" + inputGlobPath.toString()); return 1; } List<Path> sourcePaths = new ArrayList<Path>(sources.length); for (FileStatus source : sources) { if (source.isDirectory()) { System.err.println(source.getPath().toString() + " is a directory."); return 1; } sourcePaths.add(source.getPath()); } Path outputPath = new Path(jsapResult.getString(OUTPUT_ARG)); CompressionCodecFactory factory = new CompressionCodecFactory(config); FSDataOutputStream countsOutputStream = null; if (jsapResult.contains(COUNT_ARG)) { Path countsPath = null; countsPath = new Path(jsapResult.getString(COUNT_ARG)); countsOutputStream = fs.create(countsPath); } int lineCount = MergeSortTool.mergeSort(fs, sourcePaths, outputPath, factory); System.out.println("Merged " + lineCount + " lines into " + outputPath.toString()); if (countsOutputStream != null) { countsOutputStream.writeBytes("" + lineCount + '\n'); } countsOutputStream.flush(); countsOutputStream.close(); return 0; }
From source file:com.yahoo.glimmer.util.MergeSortTool.java
License:Open Source License
public static int mergeSort(FileSystem fs, List<Path> sourcePaths, Path outputPath, CompressionCodecFactory compressionCodecFactory) throws IOException { assert sourcePaths.size() > 0 : "No source paths given."; LOG.info("Sorted merge into " + outputPath.toString()); OutputStream outputStream = fs.create(outputPath); CompressionCodec inputCompressionCodec = compressionCodecFactory.getCodec(sourcePaths.get(0)); if (inputCompressionCodec != null) { LOG.info("Input compression codec " + inputCompressionCodec.getClass().getName()); }//from ww w . j a va 2 s .c om CompressionCodec outputCompressionCodec = compressionCodecFactory.getCodec(outputPath); if (outputCompressionCodec != null) { LOG.info("Output compression codec " + outputCompressionCodec.getClass().getName()); outputStream = outputCompressionCodec.createOutputStream(outputStream); } List<BufferedReader> readers = new ArrayList<BufferedReader>(); OutputStreamWriter writer = new OutputStreamWriter(outputStream); for (Path partPath : sourcePaths) { LOG.info("\tAdding source " + partPath.toString()); InputStream inputStream = fs.open(partPath); if (inputCompressionCodec != null) { inputStream = inputCompressionCodec.createInputStream(inputStream); } BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream)); readers.add(reader); } int count = ReadersWriterMergeSort.mergeSort(readers, writer); writer.close(); for (BufferedReader reader : readers) { reader.close(); } readers.clear(); LOG.info("Processed " + count + " lines into " + outputPath.toString()); return count; }
From source file:com.yahoo.labs.samoa.streams.fs.HDFSFileStreamSourceTest.java
License:Apache License
private void writeSimpleFiles(String path, String ext, int numOfFiles) { // get filesystem FileSystem dfs; try {//from w w w . j a va 2 s.co m dfs = hdfsCluster.getFileSystem(); } catch (IOException ioe) { fail("Could not access MiniDFSCluster" + ioe.getMessage()); return; } // create basedir Path basedir = new Path(path); try { dfs.mkdirs(basedir); } catch (IOException ioe) { fail("Could not create DIR:" + path + "\n" + ioe.getMessage()); return; } // write files for (int i = 1; i <= numOfFiles; i++) { String fn = null; if (ext != null) { fn = Integer.toString(i) + "." + ext; } else { fn = Integer.toString(i); } try { OutputStream fin = dfs.create(new Path(path, fn)); BufferedWriter wr = new BufferedWriter(new OutputStreamWriter(fin)); wr.write(Integer.toString(i)); wr.close(); fin.close(); } catch (IOException ioe) { fail("Fail writing to input file: " + fn + " in directory: " + path + ioe.getMessage()); } } }
From source file:com.yahoo.spaclu.data.extract.ExtractFeatureSpark.java
License:Apache License
/** * @deprecated//ww w.j a va 2 s . c o m */ public static boolean writeToHDFS(Object object, String fileName) { // Create a default hadoop configuration Configuration conf = new Configuration(); // Specifies a new file in HDFS. Path filenamePath = new Path(fileName); try { // Parse created config to the HDFS FileSystem fs = FileSystem.get(conf); // if the file already exists delete it. if (fs.exists(filenamePath)) { throw new IOException(); } FSDataOutputStream fos = fs.create(filenamePath); ObjectOutputStream oos = new ObjectOutputStream(fos); oos.writeObject(object); fos.close(); oos.close(); return true; } catch (IOException ioe) { ioe.printStackTrace(); return false; } }