List of usage examples for org.apache.hadoop.fs FileSystem getLength
@Deprecated public long getLength(Path f) throws IOException
From source file:boa.functions.BoaIntrinsics.java
License:Apache License
/** * Given the model URL, deserialize the model and return Model type * * @param Take URL for the model//w ww.j a v a2 s. c om * @return Model type after deserializing */ // TODO Take complete URL and then deserialize the model // FIXME Returning Object as a type, this needs to be changed once we defined Model Type @FunctionSpec(name = "load", returnType = "Model", formalParameters = { "string" }) public static Object load(final String URL) throws Exception { Object unserializedObject = null; FSDataInputStream in = null; try { final Configuration conf = new Configuration(); final FileSystem fileSystem = FileSystem.get(conf); final Path path = new Path("hdfs://boa-njt" + URL); if (in != null) try { in.close(); } catch (final Exception e) { e.printStackTrace(); } in = fileSystem.open(path); int numBytes = 0; final byte[] b = new byte[(int) fileSystem.getLength(path) + 1]; long length = 0; in.read(b); ByteArrayInputStream bin = new ByteArrayInputStream(b); ObjectInputStream dataIn = new ObjectInputStream(bin); unserializedObject = dataIn.readObject(); dataIn.close(); } catch (Exception ex) { } return unserializedObject; }
From source file:com.asiainfo.srd.HioBench.java
License:Apache License
public static void main(String[] args) throws Exception { options = new Options(); final Configuration conf = new Configuration(); if (options.dumpConf) { Configuration.dumpConfiguration(conf, new PrintWriter(System.out)); }//from w w w . j a v a 2 s . com final FileSystem fs = FileSystem.get(new URI(options.hdfsUri), conf); if (!fs.exists(options.filePath)) { System.out.println("no file at " + options.filePath + "; writing " + "new file now with length " + options.nGigsInFile + " gigs..."); writeFile(fs); System.out.println("done."); } else if (fs.getLength(options.filePath) != options.nBytesInFile) { System.out.println("existing file " + options.filename + " has length " + fs.getLength(options.filePath) + ", but we wanted length " + options.nBytesInFile + ". Re-creating."); writeFile(fs); System.out.println("done."); } else { System.out.println( "using existing file at " + options.filePath + " of length " + options.nGigsInFile + " gigs."); } long nanoStart = System.nanoTime(); WorkerThread threads[] = new WorkerThread[options.nThreads]; for (int i = 0; i < options.nThreads; i++) { threads[i] = new WorkerThread(i == 0, fs, WorkerThread.createBenchReader(options, i)); } for (int i = 0; i < options.nThreads; i++) { threads[i].start(); } for (int i = 0; i < options.nThreads; i++) { threads[i].join(); } for (int i = 0; i < options.nThreads; i++) { Throwable t = threads[i].getException(); if (t != null) { System.err.println("there were exceptions. Aborting."); System.exit(1); } } long nanoEnd = System.nanoTime(); fs.close(); long totalIo = options.nThreads; totalIo *= options.nBytesToRead; float nanoDiff = nanoEnd - nanoStart; float seconds = nanoDiff / 1000000000; System.out.println(String.format("Using %d threads, read %s in %f seconds", options.nThreads, prettyPrintByteSize(totalIo), seconds)); float rate = totalIo / seconds; System.out.println("Average rate was " + prettyPrintByteSize(rate) + "/s"); }
From source file:com.cloudera.HioBench.java
License:Apache License
public static void main(String[] args) throws Exception { options = new Options(); final Configuration conf = new Configuration(); if (options.dumpConf) { Configuration.dumpConfiguration(conf, new PrintWriter(System.out)); }//from w ww .j a v a 2 s . c o m final FileSystem fs = FileSystem.get(new URI(options.hdfsUri), conf); fs.setVerifyChecksum(!options.skipChecksum); if (!fs.exists(options.filePath)) { System.out.println("no file at " + options.filePath + "; writing " + "new file now with length " + options.nGigsInFile + " gigs..."); writeFile(fs); System.out.println("done."); } else if (fs.getLength(options.filePath) != options.nBytesInFile) { System.out.println("existing file " + options.filename + " has length " + fs.getLength(options.filePath) + ", but we wanted length " + options.nBytesInFile + ". Re-creating."); writeFile(fs); System.out.println("done."); } else { System.out.println( "using existing file at " + options.filePath + " of length " + options.nGigsInFile + " gigs."); } long nanoStart = System.nanoTime(); WorkerThread threads[] = new WorkerThread[options.nThreads]; for (int i = 0; i < options.nThreads; i++) { threads[i] = new WorkerThread(i == 0, fs, WorkerThread.createBenchReader(options, i)); } for (int i = 0; i < options.nThreads; i++) { threads[i].start(); } for (int i = 0; i < options.nThreads; i++) { threads[i].join(); } for (int i = 0; i < options.nThreads; i++) { Throwable t = threads[i].getException(); if (t != null) { System.err.println("there were exceptions. Aborting."); System.exit(1); } } long nanoEnd = System.nanoTime(); fs.close(); long totalIo = options.nThreads; totalIo *= options.nBytesToRead; float nanoDiff = nanoEnd - nanoStart; float seconds = nanoDiff / 1000000000; System.out.println(String.format("Using %d threads, read %s in %f seconds", options.nThreads, prettyPrintByteSize(totalIo), seconds)); float rate = totalIo / seconds; System.out.println("Average rate was " + prettyPrintByteSize(rate) + "/s"); }
From source file:com.qubole.rubix.core.CachingInputStream.java
License:Apache License
public CachingInputStream(FSDataInputStream parentInputStream, FileSystem parentFs, Path backendPath, Configuration conf, CachingFileSystemStats statsMbean, long splitSize, ClusterType clusterType) throws IOException { this.remotePath = backendPath.toString(); this.fileSize = parentFs.getLength(backendPath); lastModified = parentFs.getFileStatus(backendPath).getModificationTime(); initialize(parentInputStream, conf); this.statsMbean = statsMbean; this.splitSize = splitSize; this.clusterType = clusterType; }
From source file:com.ripariandata.timberwolf.writer.hive.SequenceFileMailWriterTest.java
License:Apache License
@SuppressWarnings("deprecation") private FileSystem mockFileSystem(final String path, final byte[] data) throws IOException { FileSystem fs = mock(FileSystem.class); Path fsPath = new Path(path); when(fs.open(eq(fsPath), any(int.class))) .thenReturn(new FSDataInputStream(new SeekablePositionedReadableByteArrayInputStream(data))); when(fs.getLength(eq(fsPath))).thenReturn((long) data.length); return fs;/*from www. j av a 2 s . c o m*/ }
From source file:org.apache.hoya.tools.ConfigHelper.java
License:Apache License
/** * This will load and parse a configuration to an XML document * @param fs filesystem//from w w w . j a v a 2 s. c o m * @param path path * @return an XML document * @throws IOException IO failure */ public Document parseConfiguration(FileSystem fs, Path path) throws IOException { int len = (int) fs.getLength(path); byte[] data = new byte[len]; FSDataInputStream in = fs.open(path); try { in.readFully(0, data); } catch (IOException e) { in.close(); } ByteArrayInputStream in2; //this is here to track down a parse issue //related to configurations String s = new String(data, 0, len); log.debug("XML resource {} is \"{}\"", path, s); in2 = new ByteArrayInputStream(data); try { Document document = parseConfigXML(in); return document; } catch (ParserConfigurationException e) { throw new IOException(e); } catch (SAXException e) { throw new IOException(e); } finally { in2.close(); } }
From source file:org.apache.hoya.tools.ConfigHelper.java
License:Apache License
/** * Load a configuration from ANY FS path. The normal Configuration * loader only works with file:// URIs// w w w . j a va2 s . c o m * @param fs filesystem * @param path path * @return a loaded resource * @throws IOException */ public static Configuration loadConfiguration(FileSystem fs, Path path) throws IOException { int len = (int) fs.getLength(path); byte[] data = new byte[len]; FSDataInputStream in = fs.open(path); try { in.readFully(0, data); } catch (IOException e) { in.close(); } ByteArrayInputStream in2; in2 = new ByteArrayInputStream(data); Configuration conf1 = new Configuration(false); conf1.addResource(in2); //now clone it while dropping all its sources Configuration conf2 = new Configuration(false); String src = path.toString(); for (Map.Entry<String, String> entry : conf1) { String key = entry.getKey(); String value = entry.getValue(); conf2.set(key, value, src); } return conf2; }
From source file:org.apache.nutch.admin.management.FileUtil.java
License:Apache License
public static long size(Path folder, Configuration configuration) throws IOException { FileSystem fileSystem = FileSystem.get(configuration); // Path[] files = fileSystem.listPaths(folder); FileStatus[] filestatuses = fileSystem.listStatus(folder); int len = filestatuses.length; Path[] files = new Path[len]; for (int i = 0; i < len; i++) { files[i] = filestatuses[i].getPath(); }/*from w w w.j av a 2 s .c o m*/ long size = 0; for (int i = 0; files != null && i < files.length; i++) { Path file = files[i]; if (fileSystem.isDirectory(file)) { size = size + size(file, configuration); } size = size + fileSystem.getLength(file); } return size + fileSystem.getLength(folder); }
From source file:org.apache.slider.common.tools.ConfigHelper.java
License:Apache License
public static byte[] loadBytes(FileSystem fs, Path path) throws IOException { int len = (int) fs.getLength(path); byte[] data = new byte[len]; /* JDK7/*from ww w . j a v a 2 s .c o m*/ try(FSDataInputStream in = fs.open(path)) { in.readFully(0, data); } */ FSDataInputStream in = null; in = fs.open(path); try { in.readFully(0, data); } finally { IOUtils.closeStream(in); } return data; }
From source file:org.commoncrawl.service.pagerank.slave.PageRankUtils.java
License:Open Source License
public static void distributeRank(final PRValueMap valueMap, final Path outlinksFile, final boolean outlinksIsRemote, File localOutputDir, String remoteOutputDir, int thisNodeIdx, int nodeCount, int iterationNumber, final ProgressAndCancelCheckCallback progressCallback) throws IOException { final Configuration conf = CrawlEnvironment.getHadoopConfig(); Vector<PRValueOutputStream> outputStreamVector = new Vector<PRValueOutputStream>(); // allocate a queue ... final LinkedBlockingQueue<OutlinkItem> queue = new LinkedBlockingQueue<OutlinkItem>(20000); try {/* w w w.j a v a2 s .c o m*/ // start the loader thread ... Thread loaderThread = new Thread(new Runnable() { final BytesWritable key = new BytesWritable(); final BytesWritable value = new BytesWritable(); final DataInputBuffer keyStream = new DataInputBuffer(); final DataInputBuffer valueStream = new DataInputBuffer(); @Override public void run() { LOG.info("Opening Outlinks File at:" + outlinksFile); SequenceFile.Reader reader = null; try { FileSystem fsForOutlinksFile = null; if (outlinksIsRemote) { fsForOutlinksFile = CrawlEnvironment.getDefaultFileSystem(); } else { fsForOutlinksFile = FileSystem.getLocal(conf); } long bytesToReadTotal = fsForOutlinksFile.getLength(outlinksFile); reader = new SequenceFile.Reader(fsForOutlinksFile, outlinksFile, conf); OutlinkItem item = new OutlinkItem(); int itemCount = 0; boolean isCancelled = false; while (!isCancelled && reader.next(key, value)) { keyStream.reset(key.get(), 0, key.getLength()); valueStream.reset(value.get(), 0, value.getLength()); //populate item from data readURLFPFromStream(keyStream, item.targetFingerprint); item.urlCount = readURLFPAndCountFromStream(valueStream, item.sourceFingerprint); try { long blockTimeStart = System.currentTimeMillis(); queue.put(item); long blockTimeEnd = System.currentTimeMillis(); } catch (InterruptedException e) { } item = new OutlinkItem(); if (itemCount++ % 10000 == 0 && progressCallback != null) { float percentComplete = (float) reader.getPosition() / (float) bytesToReadTotal; if (progressCallback.updateProgress(percentComplete)) { LOG.info("Cancel check callback returned true.Cancelling outlink item load"); isCancelled = true; } } } item.sourceFingerprint = null; item.targetFingerprint = null; // add empty item try { if (!isCancelled) { queue.put(item); } else { queue.put(new OutlinkItem(new IOException("Operation Cancelled"))); } } catch (InterruptedException e) { } } catch (IOException e) { // add error item to queue. try { queue.put(new OutlinkItem(e)); } catch (InterruptedException e1) { } } finally { if (reader != null) try { reader.close(); } catch (IOException e) { } } } }); loaderThread.start(); // first things first ... initialize output stream vector FileSystem fileSystem = buildDistributionOutputStreamVector(true, getOutlinksBaseName(thisNodeIdx, iterationNumber), localOutputDir, remoteOutputDir, thisNodeIdx, nodeCount, outputStreamVector); try { // open outlinks file . LOG.info("Iterating Items in Outlinks File and Writing Test Value"); int itemCount = 0; int totalOutlinkCount = 0; int iterationOutlinkCount = 0; long iterationStart = System.currentTimeMillis(); long timeStart = iterationStart; boolean done = false; ArrayList<OutlinkItem> items = new ArrayList<OutlinkItem>(); // start iterating outlinks while (!done) { //OutlinkItem item = null; //try { long waitTimeStart = System.currentTimeMillis(); queue.drainTo(items); long waitTimeEnd = System.currentTimeMillis(); //} catch (InterruptedException e) { //} for (OutlinkItem item : items) { if (item.error != null) { LOG.info( "Loader Thread Returned Error:" + CCStringUtils.stringifyException(item.error)); throw item.error; } else if (item.sourceFingerprint == null) { LOG.info("Loader Thread Indicated EOF via emtpy item"); done = true; } else { ++itemCount; /* LOG.info("SourceFP-DomainHash:" + item.sourceFingerprint.getDomainHash() + " URLHash:" + item.sourceFingerprint.getUrlHash() + " PartitionIdx:" + ((item.sourceFingerprint.hashCode() & Integer.MAX_VALUE) % CrawlEnvironment.PR_NUMSLAVES) ); */ // now get pr value for fingerprint (random seek in memory here!!!) float prValue = valueMap.getPRValue(item.sourceFingerprint) / (float) Math.max(item.urlCount, 1); // write value out int nodeIndex = (item.targetFingerprint.hashCode() & Integer.MAX_VALUE) % nodeCount; outputStreamVector.get(nodeIndex).writePRValue(item.targetFingerprint, item.sourceFingerprint, prValue); if (itemCount % 10000 == 0) { long timeEnd = System.currentTimeMillis(); int milliseconds = (int) (timeEnd - iterationStart); LOG.info("Distribute PR for 10000 Items with:" + iterationOutlinkCount + " Outlinks Took:" + milliseconds + " Milliseconds" + " QueueCount:" + queue.size()); iterationStart = System.currentTimeMillis(); totalOutlinkCount += iterationOutlinkCount; iterationOutlinkCount = 0; } } } items.clear(); } totalOutlinkCount += iterationOutlinkCount; LOG.info("Distribute Finished for a total of:" + itemCount + " Items with:" + totalOutlinkCount + " Outlinks Took:" + (System.currentTimeMillis() - timeStart) + " Milliseconds"); LOG.info("Waiting for Loader Thread to Die"); try { loaderThread.join(); } catch (InterruptedException e) { } LOG.info("Loader Thread Died - Moving on..."); } finally { for (PRValueOutputStream info : outputStreamVector) { if (info != null) { info.close(false); } } if (fileSystem != null) { fileSystem.close(); } } } catch (IOException e) { LOG.error("Exception caught while distributing outlinks:" + CCStringUtils.stringifyException(e)); throw e; } }