Example usage for org.apache.hadoop.fs FileSystem getLength

List of usage examples for org.apache.hadoop.fs FileSystem getLength

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem getLength.

Prototype

@Deprecated
public long getLength(Path f) throws IOException 

Source Link

Document

The number of bytes in a file.

Usage

From source file:boa.functions.BoaIntrinsics.java

License:Apache License

/**
 * Given the model URL, deserialize the model and return Model type
 *
 * @param Take URL for the model//w  ww.j a v a2 s. c  om
 * @return Model type after deserializing
 */
// TODO Take complete URL and then deserialize the model
// FIXME Returning Object as a type, this needs to be changed once we defined Model Type
@FunctionSpec(name = "load", returnType = "Model", formalParameters = { "string" })
public static Object load(final String URL) throws Exception {
    Object unserializedObject = null;
    FSDataInputStream in = null;
    try {
        final Configuration conf = new Configuration();
        final FileSystem fileSystem = FileSystem.get(conf);
        final Path path = new Path("hdfs://boa-njt" + URL);

        if (in != null)
            try {
                in.close();
            } catch (final Exception e) {
                e.printStackTrace();
            }

        in = fileSystem.open(path);
        int numBytes = 0;
        final byte[] b = new byte[(int) fileSystem.getLength(path) + 1];
        long length = 0;

        in.read(b);

        ByteArrayInputStream bin = new ByteArrayInputStream(b);
        ObjectInputStream dataIn = new ObjectInputStream(bin);
        unserializedObject = dataIn.readObject();
        dataIn.close();
    } catch (Exception ex) {
    }
    return unserializedObject;
}

From source file:com.asiainfo.srd.HioBench.java

License:Apache License

public static void main(String[] args) throws Exception {
    options = new Options();
    final Configuration conf = new Configuration();
    if (options.dumpConf) {
        Configuration.dumpConfiguration(conf, new PrintWriter(System.out));
    }//from  w  w w . j a  v  a 2  s  .  com
    final FileSystem fs = FileSystem.get(new URI(options.hdfsUri), conf);

    if (!fs.exists(options.filePath)) {
        System.out.println("no file at " + options.filePath + "; writing " + "new file now with length "
                + options.nGigsInFile + " gigs...");
        writeFile(fs);
        System.out.println("done.");
    } else if (fs.getLength(options.filePath) != options.nBytesInFile) {
        System.out.println("existing file " + options.filename + " has length " + fs.getLength(options.filePath)
                + ", but we wanted length " + options.nBytesInFile + ".  Re-creating.");
        writeFile(fs);
        System.out.println("done.");
    } else {
        System.out.println(
                "using existing file at " + options.filePath + " of length " + options.nGigsInFile + " gigs.");
    }

    long nanoStart = System.nanoTime();
    WorkerThread threads[] = new WorkerThread[options.nThreads];
    for (int i = 0; i < options.nThreads; i++) {
        threads[i] = new WorkerThread(i == 0, fs, WorkerThread.createBenchReader(options, i));
    }
    for (int i = 0; i < options.nThreads; i++) {
        threads[i].start();
    }
    for (int i = 0; i < options.nThreads; i++) {
        threads[i].join();
    }
    for (int i = 0; i < options.nThreads; i++) {
        Throwable t = threads[i].getException();
        if (t != null) {
            System.err.println("there were exceptions.  Aborting.");
            System.exit(1);
        }
    }
    long nanoEnd = System.nanoTime();
    fs.close();
    long totalIo = options.nThreads;
    totalIo *= options.nBytesToRead;
    float nanoDiff = nanoEnd - nanoStart;
    float seconds = nanoDiff / 1000000000;
    System.out.println(String.format("Using %d threads, read %s in %f seconds", options.nThreads,
            prettyPrintByteSize(totalIo), seconds));
    float rate = totalIo / seconds;
    System.out.println("Average rate was " + prettyPrintByteSize(rate) + "/s");
}

From source file:com.cloudera.HioBench.java

License:Apache License

public static void main(String[] args) throws Exception {
    options = new Options();
    final Configuration conf = new Configuration();
    if (options.dumpConf) {
        Configuration.dumpConfiguration(conf, new PrintWriter(System.out));
    }//from  w ww .j  a  v  a  2 s . c o m
    final FileSystem fs = FileSystem.get(new URI(options.hdfsUri), conf);
    fs.setVerifyChecksum(!options.skipChecksum);

    if (!fs.exists(options.filePath)) {
        System.out.println("no file at " + options.filePath + "; writing " + "new file now with length "
                + options.nGigsInFile + " gigs...");
        writeFile(fs);
        System.out.println("done.");
    } else if (fs.getLength(options.filePath) != options.nBytesInFile) {
        System.out.println("existing file " + options.filename + " has length " + fs.getLength(options.filePath)
                + ", but we wanted length " + options.nBytesInFile + ".  Re-creating.");
        writeFile(fs);
        System.out.println("done.");
    } else {
        System.out.println(
                "using existing file at " + options.filePath + " of length " + options.nGigsInFile + " gigs.");
    }

    long nanoStart = System.nanoTime();
    WorkerThread threads[] = new WorkerThread[options.nThreads];
    for (int i = 0; i < options.nThreads; i++) {
        threads[i] = new WorkerThread(i == 0, fs, WorkerThread.createBenchReader(options, i));
    }
    for (int i = 0; i < options.nThreads; i++) {
        threads[i].start();
    }
    for (int i = 0; i < options.nThreads; i++) {
        threads[i].join();
    }
    for (int i = 0; i < options.nThreads; i++) {
        Throwable t = threads[i].getException();
        if (t != null) {
            System.err.println("there were exceptions.  Aborting.");
            System.exit(1);
        }
    }
    long nanoEnd = System.nanoTime();
    fs.close();
    long totalIo = options.nThreads;
    totalIo *= options.nBytesToRead;
    float nanoDiff = nanoEnd - nanoStart;
    float seconds = nanoDiff / 1000000000;
    System.out.println(String.format("Using %d threads, read %s in %f seconds", options.nThreads,
            prettyPrintByteSize(totalIo), seconds));
    float rate = totalIo / seconds;
    System.out.println("Average rate was " + prettyPrintByteSize(rate) + "/s");
}

From source file:com.qubole.rubix.core.CachingInputStream.java

License:Apache License

public CachingInputStream(FSDataInputStream parentInputStream, FileSystem parentFs, Path backendPath,
        Configuration conf, CachingFileSystemStats statsMbean, long splitSize, ClusterType clusterType)
        throws IOException {
    this.remotePath = backendPath.toString();
    this.fileSize = parentFs.getLength(backendPath);
    lastModified = parentFs.getFileStatus(backendPath).getModificationTime();
    initialize(parentInputStream, conf);
    this.statsMbean = statsMbean;
    this.splitSize = splitSize;
    this.clusterType = clusterType;
}

From source file:com.ripariandata.timberwolf.writer.hive.SequenceFileMailWriterTest.java

License:Apache License

@SuppressWarnings("deprecation")
private FileSystem mockFileSystem(final String path, final byte[] data) throws IOException {
    FileSystem fs = mock(FileSystem.class);
    Path fsPath = new Path(path);
    when(fs.open(eq(fsPath), any(int.class)))
            .thenReturn(new FSDataInputStream(new SeekablePositionedReadableByteArrayInputStream(data)));
    when(fs.getLength(eq(fsPath))).thenReturn((long) data.length);
    return fs;/*from   www. j av a  2  s . c o  m*/
}

From source file:org.apache.hoya.tools.ConfigHelper.java

License:Apache License

/**
 * This will load and parse a configuration to an XML document
 * @param fs filesystem//from   w  w w  .  j  a  v a 2  s. c  o  m
 * @param path path
 * @return an XML document
 * @throws IOException IO failure
 */
public Document parseConfiguration(FileSystem fs, Path path) throws IOException {
    int len = (int) fs.getLength(path);
    byte[] data = new byte[len];
    FSDataInputStream in = fs.open(path);
    try {
        in.readFully(0, data);
    } catch (IOException e) {
        in.close();
    }
    ByteArrayInputStream in2;

    //this is here to track down a parse issue
    //related to configurations
    String s = new String(data, 0, len);
    log.debug("XML resource {} is \"{}\"", path, s);
    in2 = new ByteArrayInputStream(data);
    try {
        Document document = parseConfigXML(in);
        return document;
    } catch (ParserConfigurationException e) {
        throw new IOException(e);
    } catch (SAXException e) {
        throw new IOException(e);
    } finally {
        in2.close();
    }

}

From source file:org.apache.hoya.tools.ConfigHelper.java

License:Apache License

/**
 * Load a configuration from ANY FS path. The normal Configuration
 * loader only works with file:// URIs//  w w w . j  a va2  s . c o  m
 * @param fs filesystem
 * @param path path
 * @return a loaded resource
 * @throws IOException
 */
public static Configuration loadConfiguration(FileSystem fs, Path path) throws IOException {
    int len = (int) fs.getLength(path);
    byte[] data = new byte[len];
    FSDataInputStream in = fs.open(path);
    try {
        in.readFully(0, data);
    } catch (IOException e) {
        in.close();
    }
    ByteArrayInputStream in2;

    in2 = new ByteArrayInputStream(data);
    Configuration conf1 = new Configuration(false);
    conf1.addResource(in2);
    //now clone it while dropping all its sources
    Configuration conf2 = new Configuration(false);
    String src = path.toString();
    for (Map.Entry<String, String> entry : conf1) {
        String key = entry.getKey();
        String value = entry.getValue();
        conf2.set(key, value, src);
    }
    return conf2;
}

From source file:org.apache.nutch.admin.management.FileUtil.java

License:Apache License

public static long size(Path folder, Configuration configuration) throws IOException {

    FileSystem fileSystem = FileSystem.get(configuration);
    // Path[] files = fileSystem.listPaths(folder);
    FileStatus[] filestatuses = fileSystem.listStatus(folder);
    int len = filestatuses.length;
    Path[] files = new Path[len];
    for (int i = 0; i < len; i++) {
        files[i] = filestatuses[i].getPath();
    }/*from  w w w.j av a 2  s  .c o m*/

    long size = 0;
    for (int i = 0; files != null && i < files.length; i++) {
        Path file = files[i];
        if (fileSystem.isDirectory(file)) {
            size = size + size(file, configuration);
        }
        size = size + fileSystem.getLength(file);
    }
    return size + fileSystem.getLength(folder);
}

From source file:org.apache.slider.common.tools.ConfigHelper.java

License:Apache License

public static byte[] loadBytes(FileSystem fs, Path path) throws IOException {
    int len = (int) fs.getLength(path);
    byte[] data = new byte[len];
    /* JDK7/*from   ww w . j  a v a 2  s  .c o  m*/
    try(FSDataInputStream in = fs.open(path)) {
      in.readFully(0, data);
    }
    */
    FSDataInputStream in = null;
    in = fs.open(path);
    try {
        in.readFully(0, data);
    } finally {
        IOUtils.closeStream(in);
    }
    return data;
}

From source file:org.commoncrawl.service.pagerank.slave.PageRankUtils.java

License:Open Source License

public static void distributeRank(final PRValueMap valueMap, final Path outlinksFile,
        final boolean outlinksIsRemote, File localOutputDir, String remoteOutputDir, int thisNodeIdx,
        int nodeCount, int iterationNumber, final ProgressAndCancelCheckCallback progressCallback)
        throws IOException {

    final Configuration conf = CrawlEnvironment.getHadoopConfig();

    Vector<PRValueOutputStream> outputStreamVector = new Vector<PRValueOutputStream>();

    // allocate a queue ... 
    final LinkedBlockingQueue<OutlinkItem> queue = new LinkedBlockingQueue<OutlinkItem>(20000);

    try {/* w w  w.j  a  v  a2  s  .c  o  m*/

        // start the loader thread ... 
        Thread loaderThread = new Thread(new Runnable() {

            final BytesWritable key = new BytesWritable();
            final BytesWritable value = new BytesWritable();

            final DataInputBuffer keyStream = new DataInputBuffer();
            final DataInputBuffer valueStream = new DataInputBuffer();

            @Override
            public void run() {
                LOG.info("Opening Outlinks File at:" + outlinksFile);
                SequenceFile.Reader reader = null;
                try {

                    FileSystem fsForOutlinksFile = null;
                    if (outlinksIsRemote) {
                        fsForOutlinksFile = CrawlEnvironment.getDefaultFileSystem();
                    } else {
                        fsForOutlinksFile = FileSystem.getLocal(conf);
                    }

                    long bytesToReadTotal = fsForOutlinksFile.getLength(outlinksFile);

                    reader = new SequenceFile.Reader(fsForOutlinksFile, outlinksFile, conf);
                    OutlinkItem item = new OutlinkItem();
                    int itemCount = 0;
                    boolean isCancelled = false;
                    while (!isCancelled && reader.next(key, value)) {

                        keyStream.reset(key.get(), 0, key.getLength());
                        valueStream.reset(value.get(), 0, value.getLength());

                        //populate item from data 
                        readURLFPFromStream(keyStream, item.targetFingerprint);
                        item.urlCount = readURLFPAndCountFromStream(valueStream, item.sourceFingerprint);

                        try {
                            long blockTimeStart = System.currentTimeMillis();
                            queue.put(item);
                            long blockTimeEnd = System.currentTimeMillis();
                        } catch (InterruptedException e) {
                        }
                        item = new OutlinkItem();

                        if (itemCount++ % 10000 == 0 && progressCallback != null) {

                            float percentComplete = (float) reader.getPosition() / (float) bytesToReadTotal;
                            if (progressCallback.updateProgress(percentComplete)) {
                                LOG.info("Cancel check callback returned true.Cancelling outlink item load");
                                isCancelled = true;
                            }
                        }
                    }
                    item.sourceFingerprint = null;
                    item.targetFingerprint = null;

                    // add empty item 
                    try {
                        if (!isCancelled) {
                            queue.put(item);
                        } else {
                            queue.put(new OutlinkItem(new IOException("Operation Cancelled")));
                        }
                    } catch (InterruptedException e) {
                    }

                } catch (IOException e) {
                    // add error item to queue.
                    try {
                        queue.put(new OutlinkItem(e));
                    } catch (InterruptedException e1) {
                    }
                } finally {
                    if (reader != null)
                        try {
                            reader.close();
                        } catch (IOException e) {
                        }
                }
            }

        });

        loaderThread.start();

        // first things first ... initialize output stream vector
        FileSystem fileSystem = buildDistributionOutputStreamVector(true,
                getOutlinksBaseName(thisNodeIdx, iterationNumber), localOutputDir, remoteOutputDir, thisNodeIdx,
                nodeCount, outputStreamVector);

        try {
            // open outlinks file .
            LOG.info("Iterating Items in Outlinks File and Writing Test Value");

            int itemCount = 0;
            int totalOutlinkCount = 0;
            int iterationOutlinkCount = 0;
            long iterationStart = System.currentTimeMillis();
            long timeStart = iterationStart;

            boolean done = false;

            ArrayList<OutlinkItem> items = new ArrayList<OutlinkItem>();
            // start iterating outlinks 
            while (!done) {

                //OutlinkItem item = null;

                //try {
                long waitTimeStart = System.currentTimeMillis();
                queue.drainTo(items);
                long waitTimeEnd = System.currentTimeMillis();
                //} catch (InterruptedException e) {
                //}

                for (OutlinkItem item : items) {
                    if (item.error != null) {
                        LOG.info(
                                "Loader Thread Returned Error:" + CCStringUtils.stringifyException(item.error));
                        throw item.error;
                    } else if (item.sourceFingerprint == null) {
                        LOG.info("Loader Thread Indicated EOF via emtpy item");
                        done = true;
                    } else {
                        ++itemCount;

                        /*
                        LOG.info("SourceFP-DomainHash:" + item.sourceFingerprint.getDomainHash() + " URLHash:" + item.sourceFingerprint.getUrlHash() 
                              + " PartitionIdx:" + ((item.sourceFingerprint.hashCode() & Integer.MAX_VALUE) % CrawlEnvironment.PR_NUMSLAVES) );
                        */

                        // now get pr value for fingerprint (random seek in memory here!!!)
                        float prValue = valueMap.getPRValue(item.sourceFingerprint)
                                / (float) Math.max(item.urlCount, 1);

                        // write value out 
                        int nodeIndex = (item.targetFingerprint.hashCode() & Integer.MAX_VALUE) % nodeCount;
                        outputStreamVector.get(nodeIndex).writePRValue(item.targetFingerprint,
                                item.sourceFingerprint, prValue);

                        if (itemCount % 10000 == 0) {

                            long timeEnd = System.currentTimeMillis();
                            int milliseconds = (int) (timeEnd - iterationStart);

                            LOG.info("Distribute PR for 10000 Items with:" + iterationOutlinkCount
                                    + " Outlinks Took:" + milliseconds + " Milliseconds" + " QueueCount:"
                                    + queue.size());

                            iterationStart = System.currentTimeMillis();
                            totalOutlinkCount += iterationOutlinkCount;
                            iterationOutlinkCount = 0;
                        }

                    }
                }
                items.clear();
            }

            totalOutlinkCount += iterationOutlinkCount;

            LOG.info("Distribute Finished for a total of:" + itemCount + " Items with:" + totalOutlinkCount
                    + " Outlinks Took:" + (System.currentTimeMillis() - timeStart) + " Milliseconds");

            LOG.info("Waiting for Loader Thread to Die");
            try {
                loaderThread.join();
            } catch (InterruptedException e) {
            }
            LOG.info("Loader Thread Died - Moving on...");
        } finally {

            for (PRValueOutputStream info : outputStreamVector) {

                if (info != null) {
                    info.close(false);
                }
            }

            if (fileSystem != null) {
                fileSystem.close();
            }
        }
    } catch (IOException e) {
        LOG.error("Exception caught while distributing outlinks:" + CCStringUtils.stringifyException(e));
        throw e;
    }
}