List of usage examples for org.apache.hadoop.fs FileSystem create
public FSDataOutputStream create(Path f) throws IOException
From source file:Importer.java
License:Open Source License
public static void copyFile(File file) throws Exception { // String TEST_PREFIX = ""; File destFile = new File(outDir, file.getName() + ".seq"); Path dest = new Path(destFile.getAbsolutePath()); Configuration conf = new Configuration(); FileSystem fileSys = org.apache.hadoop.fs.FileSystem.get(new java.net.URI(conf.get("fs.default.name")), conf);//from w w w. j a v a 2s .com CompressionCodec codec = new DefaultCodec(); fileSys.mkdirs(dest.getParent()); FSDataOutputStream outputStr = fileSys.create(dest); seqFileWriter = SequenceFile.createWriter(conf, outputStr, Text.class, Text.class, SequenceFile.CompressionType.BLOCK, codec); String filename = file.getName(); InputStream in = new BufferedInputStream(new FileInputStream(file)); if (filename.endsWith(".bz2")) { in.read(); in.read(); //snarf header in = new CBZip2InputStream(in); } BufferedReader br = new BufferedReader(new InputStreamReader(in, "US-ASCII")); System.out.println("working on file " + file); int records = 0; long bytes = 0, bytes_since_status = 0; long startTime = System.currentTimeMillis(); String s = null; Text content = new Text(); while ((s = br.readLine()) != null) { if (s.startsWith("---END.OF.DOCUMENT---")) { Text name = new Text(hash(content)); seqFileWriter.append(name, content); records++; content = new Text(); } else { byte[] line_as_bytes = (s + " ").getBytes(); for (byte b : line_as_bytes) { assert b < 128 : "found an unexpected high-bit set"; } content.append(line_as_bytes, 0, line_as_bytes.length); bytes += line_as_bytes.length; /* bytes_since_status += line_as_bytes.length; if(bytes_since_status > 10 * 1024 * 1024) { //every 10 MB System.err.print('.'); bytes_since_status = 0; }*/ } } //end while if (content.getLength() > 5) { Text name = new Text(hash(content)); seqFileWriter.append(name, content); records++; } totalBytes += bytes; totalRecords += records; long time = (System.currentTimeMillis() - startTime) / 1000 + 1; long kbSec = bytes / 1024 / time; System.out.println(new java.util.Date()); System.out.println("File " + file.getName() + " " + records + " records, " + bytes + " bytes in " + time + " seconds (" + kbSec + " KB/sec)."); in.close(); seqFileWriter.close(); outputStr.close(); }
From source file:Vectors.java
License:Apache License
public static void write(Vector vector, Path path, Configuration conf, boolean laxPrecision) throws IOException { FileSystem fs = FileSystem.get(path.toUri(), conf); FSDataOutputStream out = fs.create(path); try {//from w w w.ja va 2s . c o m VectorWritable vectorWritable = new VectorWritable(vector); vectorWritable.setWritesLaxPrecision(laxPrecision); vectorWritable.write(out); } finally { Closeables.closeQuietly(out); } }
From source file:acromusashi.stream.bolt.hdfs.HdfsStreamWriter.java
License:Open Source License
/** * ??HDFS??Open?/*from w ww . j a v a 2 s . c om*/ * * @param filePath HDFS * @param fs * @param isFileSyncEachTime ????????? * @throws IOException Open */ public void open(String filePath, FileSystem fs, boolean isFileSyncEachTime) throws IOException { Path dstPath = new Path(filePath); if (fs.exists(dstPath) == true) { this.delegateStream = fs.append(dstPath); } else { this.delegateStream = fs.create(dstPath); } this.isFileSyncEachTime = isFileSyncEachTime; }
From source file:alluxio.hadoop.FileSystemAclIntegrationTest.java
License:Apache License
private static void create(org.apache.hadoop.fs.FileSystem fs, Path path) throws IOException { FSDataOutputStream o = fs.create(path); o.writeBytes("Test Bytes"); o.close();// w ww . ja v a 2 s . c o m }
From source file:Assignment3_P2_MergeStockAverageCount.StockPriceMergeDriver.java
/** * @param args the command line arguments */// w ww . j a v a 2s. co m public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(); // local file system handle FileSystem local = FileSystem.getLocal(conf); // hdfs file system handle FileSystem hdfs = FileSystem.get(conf); // local input directory Path inputDir = new Path(args[0]); // hdfs i/p directory Path inputDir1 = new Path(args[1]); // local input files in local dir FileStatus[] inputFiles = local.listStatus(inputDir); // o/p stream FSDataOutputStream out = hdfs.create(inputDir1); // open each file and extract contents of file for (int i = 0; i < inputFiles.length; i++) { System.out.println("File name ----------------------------------------------------------------> " + inputFiles[i].getPath().getName()); FSDataInputStream in = local.open(inputFiles[i].getPath()); byte buffer[] = new byte[256]; int bytesRead = 0; // extract all contents of file while ((bytesRead = in.read(buffer)) > 0) { out.write(buffer, 0, bytesRead); } // close input stream in.close(); } Job job = Job.getInstance(conf, "Average Stock Price"); job.setJarByClass(StockPriceMergeDriver.class); job.setMapperClass(StockPriceMerge_Mapper.class); job.setCombinerClass(StockPriceMerge_Reducer.class); job.setReducerClass(StockPriceMerge_Reducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(FloatWritable.class); FileInputFormat.addInputPath(job, new Path(args[1])); // above programs output will be input for mapper FileOutputFormat.setOutputPath(job, new Path(args[2])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:at.illecker.hama.hybrid.examples.hellohybrid.HelloHybridBSP.java
License:Apache License
@Override public void bsp(BSPPeer<IntWritable, NullWritable, IntWritable, NullWritable, NullWritable> peer) throws IOException, SyncException, InterruptedException { BSPJob job = new BSPJob((HamaConfiguration) peer.getConfiguration()); FileSystem fs = FileSystem.get(peer.getConfiguration()); FSDataOutputStream outStream = fs .create(new Path(FileOutputFormat.getOutputPath(job), peer.getTaskId() + ".log")); outStream.writeChars("HelloHybrid.bsp executed on CPU!\n"); ArrayList<Integer> summation = new ArrayList<Integer>(); // test input IntWritable key = new IntWritable(); NullWritable nullValue = NullWritable.get(); while (peer.readNext(key, nullValue)) { outStream.writeChars("input: key: '" + key.get() + "'\n"); summation.add(key.get());//from ww w .j a v a2 s. c o m } // test sequenceFileReader Path example = new Path(peer.getConfiguration().get(CONF_EXAMPLE_PATH)); SequenceFile.Reader reader = null; try { reader = new SequenceFile.Reader(fs, example, peer.getConfiguration()); int i = 0; while (reader.next(key, nullValue)) { outStream.writeChars("sequenceFileReader: key: '" + key.get() + "'\n"); if (i < summation.size()) { summation.set(i, summation.get(i) + key.get()); } i++; } } catch (IOException e) { throw new RuntimeException(e); } finally { if (reader != null) { reader.close(); } } // test output for (Integer i : summation) { key.set(i); outStream.writeChars("output: key: '" + key.get() + "'\n"); peer.write(key, nullValue); } // test getAllPeerNames outStream.writeChars("getAllPeerNames: '" + Arrays.toString(peer.getAllPeerNames()) + "'\n"); // test String.split String splitString = "boo:and:foo"; String[] splits; outStream.writeChars("splitString: '" + splitString + "'\n"); splits = splitString.split(":"); outStream.writeChars("split(\":\") len: " + splits.length + " values: '" + Arrays.toString(splits) + "'\n"); splits = splitString.split(":", 2); outStream.writeChars( "split(\":\",2) len: " + splits.length + " values: '" + Arrays.toString(splits) + "'\n"); splits = splitString.split(":", 5); outStream.writeChars( "split(\":\",5) len: " + splits.length + " values: '" + Arrays.toString(splits) + "'\n"); splits = splitString.split(":", -2); outStream.writeChars( "split(\":\",-2) len: " + splits.length + " values: '" + Arrays.toString(splits) + "'\n"); splits = splitString.split(";"); outStream.writeChars("split(\";\") len: " + splits.length + " values: '" + Arrays.toString(splits) + "'\n"); outStream.close(); }
From source file:at.illecker.hama.hybrid.examples.hellohybrid.HelloHybridBSP.java
License:Apache License
@Override public void bspGpu(BSPPeer<IntWritable, NullWritable, IntWritable, NullWritable, NullWritable> peer, Rootbeer rootbeer) throws IOException, SyncException, InterruptedException { BSPJob job = new BSPJob((HamaConfiguration) peer.getConfiguration()); FileSystem fs = FileSystem.get(peer.getConfiguration()); FSDataOutputStream outStream = fs .create(new Path(FileOutputFormat.getOutputPath(job), peer.getTaskId() + ".log")); outStream.writeChars("HelloHybrid.bspGpu executed on GPU!\n"); HelloHybridKernel kernel = new HelloHybridKernel(peer.getConfiguration().get(CONF_EXAMPLE_PATH), CONF_N, "boo:and:foo", ":"); // Run GPU Kernels Context context = rootbeer.createDefaultContext(); Stopwatch watch = new Stopwatch(); watch.start();//from ww w . j a v a 2s . c om // 1 Kernel within 1 Block rootbeer.run(kernel, new ThreadConfig(1, 1, 1), context); watch.stop(); List<StatsRow> stats = context.getStats(); for (StatsRow row : stats) { outStream.writeChars(" StatsRow:\n"); outStream.writeChars(" serial time: " + row.getSerializationTime() + "\n"); outStream.writeChars(" exec time: " + row.getExecutionTime() + "\n"); outStream.writeChars(" deserial time: " + row.getDeserializationTime() + "\n"); outStream.writeChars(" num blocks: " + row.getNumBlocks() + "\n"); outStream.writeChars(" num threads: " + row.getNumThreads() + "\n"); } outStream.writeChars("HelloHybridKernel,GPUTime=" + watch.elapsedTimeMillis() + "ms\n"); outStream.writeChars("HelloHybridKernel,peerName: '" + kernel.peerName + "'\n"); outStream.writeChars("HelloHybridKernel,numPeers: '" + kernel.numPeers + "'\n"); outStream.writeChars("HelloHybridKernel,summation: '" + Arrays.toString(kernel.summation) + "'\n"); outStream.writeChars("HelloHybridKernel,getAllPeerNames: '" + Arrays.toString(kernel.allPeerNames) + "'\n"); // test String.split outStream.writeChars("HelloHybridKernel,splitString: '" + kernel.splitString + "'\n"); outStream.writeChars("HelloHybridKernel,split(\"" + kernel.delimiter + "\") len: " + kernel.splits1.length + " values: '" + Arrays.toString(kernel.splits1) + "'\n"); outStream.writeChars("HelloHybridKernel,split(\"" + kernel.delimiter + "\",2) len: " + kernel.splits2.length + " values: '" + Arrays.toString(kernel.splits2) + "'\n"); outStream.writeChars("HelloHybridKernel,split(\"" + kernel.delimiter + "\",5) len: " + kernel.splits3.length + " values: '" + Arrays.toString(kernel.splits3) + "'\n"); outStream.writeChars("HelloHybridKernel,split(\"" + kernel.delimiter + "\",-2) len: " + kernel.splits4.length + " values: '" + Arrays.toString(kernel.splits4) + "'\n"); outStream.writeChars("HelloHybridKernel,split(\";\") len: " + kernel.splits5.length + " values: '" + Arrays.toString(kernel.splits5) + "'\n"); outStream.close(); }
From source file:at.illecker.hama.hybrid.examples.kmeans.KMeansHybridBSP.java
License:Apache License
/********************************* CPU *********************************/ @Override/*from ww w.j a v a 2s . c o m*/ public void setup( BSPPeer<PipesVectorWritable, NullWritable, IntWritable, PipesVectorWritable, CenterMessage> peer) throws IOException { this.m_conf = peer.getConfiguration(); this.m_timeMeasurement = m_conf.getBoolean(CONF_TIME, false); this.m_isDebuggingEnabled = m_conf.getBoolean(CONF_DEBUG, false); this.m_maxIterations = m_conf.getInt(CONF_MAX_ITERATIONS, -1); // Init logging if (m_isDebuggingEnabled) { try { FileSystem fs = FileSystem.get(m_conf); m_logger = fs.create(new Path(FileOutputFormat.getOutputPath(new BSPJob((HamaConfiguration) m_conf)) + "/BSP_" + peer.getTaskId() + ".log")); } catch (IOException e) { e.printStackTrace(); } } long startTime = 0; if (m_timeMeasurement) { startTime = System.currentTimeMillis(); } // Init center vectors Path centroids = new Path(m_conf.get(CONF_CENTER_IN_PATH)); FileSystem fs = FileSystem.get(m_conf); final ArrayList<DoubleVector> centers = new ArrayList<DoubleVector>(); SequenceFile.Reader reader = null; try { reader = new SequenceFile.Reader(fs, centroids, m_conf); PipesVectorWritable key = new PipesVectorWritable(); NullWritable value = NullWritable.get(); while (reader.next(key, value)) { DoubleVector center = key.getVector(); centers.add(center); } } catch (IOException e) { throw new RuntimeException(e); } finally { if (reader != null) { reader.close(); } } Preconditions.checkArgument(centers.size() > 0, "Centers file must contain at least a single center!"); this.m_centers_cpu = centers.toArray(new DoubleVector[centers.size()]); long stopTime = 0; if (m_timeMeasurement) { stopTime = System.currentTimeMillis(); LOG.info("# setupTime: " + ((stopTime - startTime) / 1000.0) + " sec"); if (m_isDebuggingEnabled) { m_logger.writeChars("PiEstimatorHybrid,setupTime: " + ((stopTime - startTime) / 1000.0) + " sec\n"); } } }
From source file:at.illecker.hama.hybrid.examples.kmeans.KMeansHybridBSP.java
License:Apache License
/********************************* GPU *********************************/ @Override/* w ww . ja va2 s. co m*/ public void setupGpu( BSPPeer<PipesVectorWritable, NullWritable, IntWritable, PipesVectorWritable, CenterMessage> peer) throws IOException, SyncException, InterruptedException { this.m_conf = peer.getConfiguration(); this.m_timeMeasurement = m_conf.getBoolean(CONF_TIME, false); this.m_isDebuggingEnabled = m_conf.getBoolean(CONF_DEBUG, false); this.m_maxIterations = m_conf.getInt(CONF_MAX_ITERATIONS, -1); this.m_blockSize = Integer.parseInt(this.m_conf.get(CONF_BLOCKSIZE)); this.m_gridSize = Integer.parseInt(this.m_conf.get(CONF_GRIDSIZE)); // Init logging if (m_isDebuggingEnabled) { try { FileSystem fs = FileSystem.get(m_conf); m_logger = fs.create(new Path(FileOutputFormat.getOutputPath(new BSPJob((HamaConfiguration) m_conf)) + "/BSP_" + peer.getTaskId() + ".log")); } catch (IOException e) { e.printStackTrace(); } } long startTime = 0; if (m_timeMeasurement) { startTime = System.currentTimeMillis(); } // Init center vectors Path centroids = new Path(m_conf.get(CONF_CENTER_IN_PATH)); FileSystem fs = FileSystem.get(m_conf); final List<double[]> centers = new ArrayList<double[]>(); SequenceFile.Reader reader = null; try { reader = new SequenceFile.Reader(fs, centroids, m_conf); PipesVectorWritable key = new PipesVectorWritable(); NullWritable value = NullWritable.get(); while (reader.next(key, value)) { centers.add(key.getVector().toArray()); } } catch (IOException e) { throw new RuntimeException(e); } finally { if (reader != null) { reader.close(); } } Preconditions.checkArgument(centers.size() > 0, "Centers file must contain at least a single center!"); // build centers_gpu double[][] this.m_centers_gpu = new double[centers.size()][centers.get(0).length]; for (int i = 0; i < centers.size(); i++) { double[] vector = centers.get(i); for (int j = 0; j < vector.length; j++) { this.m_centers_gpu[i][j] = vector[j]; } } long stopTime = 0; if (m_timeMeasurement) { stopTime = System.currentTimeMillis(); LOG.info("# setupGpuTime: " + ((stopTime - startTime) / 1000.0) + " sec"); if (m_isDebuggingEnabled) { m_logger.writeChars( "PiEstimatorHybrid,setupGpuTime: " + ((stopTime - startTime) / 1000.0) + " sec\n"); } } }
From source file:at.illecker.hama.hybrid.examples.matrixmultiplication.MatrixMultiplicationHybridBSP.java
License:Apache License
/********************************* CPU *********************************/ @Override/*from ww w .j a va 2s . co m*/ public void setup( BSPPeer<IntWritable, PipesVectorWritable, IntWritable, PipesVectorWritable, MatrixRowMessage> peer) throws IOException { HamaConfiguration conf = peer.getConfiguration(); this.m_isDebuggingEnabled = conf.getBoolean(CONF_DEBUG, false); // Choose one as a master, who sorts the matrix rows at the end // m_masterTask = peer.getPeerName(peer.getNumPeers() / 2); // TODO // task must be 0 otherwise write out does NOT work! this.m_masterTask = peer.getPeerName(0); // Init logging if (m_isDebuggingEnabled) { try { FileSystem fs = FileSystem.get(conf); m_logger = fs.create(new Path(FileOutputFormat.getOutputPath(new BSPJob((HamaConfiguration) conf)) + "/BSP_" + peer.getTaskId() + ".log")); } catch (IOException e) { e.printStackTrace(); } } // Load transposed Matrix B SequenceFile.Reader reader = new SequenceFile.Reader(FileSystem.get(conf), new Path(conf.get(CONF_MATRIX_MULT_B_PATH)), conf); IntWritable bKey = new IntWritable(); PipesVectorWritable bVector = new PipesVectorWritable(); // for each col of matrix B (cause by transposed B) while (reader.next(bKey, bVector)) { m_bColumns.add(new KeyValuePair<Integer, DoubleVector>(bKey.get(), bVector.getVector())); if (m_isDebuggingEnabled) { m_logger.writeChars("setup,read,transposedMatrixB,key=" + bKey.get() + ",value=" + bVector.getVector().toString() + "\n"); } } reader.close(); }