Example usage for java.io RandomAccessFile readFully

List of usage examples for java.io RandomAccessFile readFully

Introduction

In this page you can find the example usage for java.io RandomAccessFile readFully.

Prototype

public final void readFully(byte b[], int off, int len) throws IOException 

Source Link

Document

Reads exactly len bytes from this file into the byte array, starting at the current file pointer.

Usage

From source file:org.commoncrawl.service.listcrawler.CrawlList.java

private OnDiskCrawlHistoryItem loadOnDiskItemForURLFP(URLFP fingerprint) throws IOException {

    // see if state is cached in memory ...
    boolean loadedFromMemory = false;

    synchronized (this) {
        if (_tempFixedDataBuffer != null) {

            loadedFromMemory = true;// www  .  ja  v  a 2  s. com

            int low = 0;
            int high = (int) (_tempFixedDataBufferSize / OnDiskCrawlHistoryItem.ON_DISK_SIZE) - 1;

            OnDiskCrawlHistoryItem itemOut = new OnDiskCrawlHistoryItem();
            DataInputBuffer inputBuffer = new DataInputBuffer();

            int iterationNumber = 0;

            while (low <= high) {

                ++iterationNumber;

                int mid = low + ((high - low) / 2);

                inputBuffer.reset(_tempFixedDataBuffer, 0, _tempFixedDataBufferSize);
                inputBuffer.skip(mid * OnDiskCrawlHistoryItem.ON_DISK_SIZE);

                // deserialize 
                itemOut.deserialize(inputBuffer);

                // now compare it against desired hash value ...
                int comparisonResult = itemOut.compareFingerprints(fingerprint);

                if (comparisonResult > 0)
                    high = mid - 1;
                else if (comparisonResult < 0)
                    low = mid + 1;
                else {

                    // cache offset 
                    itemOut._fileOffset = mid * OnDiskCrawlHistoryItem.ON_DISK_SIZE;

                    // LOG.info("Found Match. Took:"+ iterationNumber + " iterations");
                    // and return item 
                    return itemOut;
                }
            }
            //LOG.error("Did Not Find Match For Domain:" + fingerprint.getDomainHash() + " URLFP:" + fingerprint.getUrlHash() + " Took:" + iterationNumber + " iterations");
        }
    }

    if (!loadedFromMemory) {
        //load from disk 

        //LOG.info("Opening Data File for OnDiskItem load for Fingerprint:" + fingerprint.getUrlHash());

        RandomAccessFile file = new RandomAccessFile(_fixedDataFile, "rw");

        // allocate buffer upfront 
        byte[] onDiskItemBuffer = new byte[OnDiskCrawlHistoryItem.ON_DISK_SIZE];
        DataInputBuffer inputStream = new DataInputBuffer();

        //LOG.info("Opened Data File. Searching for match");
        try {

            int low = 0;
            int high = (int) (file.length() / OnDiskCrawlHistoryItem.ON_DISK_SIZE) - 1;

            OnDiskCrawlHistoryItem itemOut = new OnDiskCrawlHistoryItem();

            int iterationNumber = 0;

            while (low <= high) {

                ++iterationNumber;

                int mid = low + ((high - low) / 2);

                // seek to proper location 
                file.seek(mid * OnDiskCrawlHistoryItem.ON_DISK_SIZE);
                // read the data structure 
                file.readFully(onDiskItemBuffer, 0, onDiskItemBuffer.length);
                // map location in file 
                //MappedByteBuffer memoryBuffer = file.getChannel().map(MapMode.READ_ONLY,mid * OnDiskCrawlHistoryItem.ON_DISK_SIZE,OnDiskCrawlHistoryItem.ON_DISK_SIZE);
                //DataInputStream  inputStream = new DataInputStream(new ByteBufferInputStream(memoryBuffer));
                inputStream.reset(onDiskItemBuffer, 0, OnDiskCrawlHistoryItem.ON_DISK_SIZE);

                // deserialize 
                itemOut.deserialize(inputStream);

                // memoryBuffer = null;
                //inputStream = null;

                // now compare it against desired hash value ...
                int comparisonResult = itemOut.compareFingerprints(fingerprint);

                if (comparisonResult > 0)
                    high = mid - 1;
                else if (comparisonResult < 0)
                    low = mid + 1;
                else {

                    // cache offset 
                    itemOut._fileOffset = mid * OnDiskCrawlHistoryItem.ON_DISK_SIZE;

                    // LOG.info("Found Match. Took:"+ iterationNumber + " iterations");
                    // and return item 
                    return itemOut;
                }
            }
            //LOG.error("******Did Not Find Match For Domain:" + fingerprint.getDomainHash() + " URLFP:" + fingerprint.getUrlHash() + " Took:" + iterationNumber + " iterations");

            //DEBUG ONLY !
            // dumpFixedDataFile();
        } finally {
            file.close();
        }
    }
    return null;
}

From source file:org.commoncrawl.service.listcrawler.CrawlList.java

public ArrayList<CrawlListDomainItem> getSubDomainList(int offset, int count) {
    synchronized (_metadata) {

        ArrayList<CrawlListDomainItem> itemsOut = new ArrayList<CrawlListDomainItem>();

        try {/*w  w w  . j ava 2  s .  co m*/
            synchronized (_subDomainMetadataFile) {
                RandomAccessFile file = new RandomAccessFile(_subDomainMetadataFile, "rw");
                DataInputBuffer inputBuffer = new DataInputBuffer();
                byte fixedDataBlock[] = new byte[CrawlListMetadata.Constants.FixedDataSize];

                try {
                    // skip version 
                    file.read();
                    // read item count 
                    int itemCount = file.readInt();

                    int i = offset;
                    int end = Math.min(i + count, itemCount);

                    LOG.info("*** LIST:" + getListId() + " SUBDOMAIN ITEM COUNT:" + itemCount);

                    if (i < itemCount) {

                        file.seek(5 + (CrawlListMetadata.Constants.FixedDataSize * offset));

                        CrawlListMetadata newMetadata = new CrawlListMetadata();

                        for (; i < end; ++i) {

                            long orignalPos = file.getFilePointer();
                            file.readFully(fixedDataBlock, 0, fixedDataBlock.length);
                            inputBuffer.reset(fixedDataBlock, fixedDataBlock.length);
                            newMetadata.deserialize(inputBuffer, new BinaryProtocol());
                            itemsOut.add(buildSubDomainSummary(newMetadata.getDomainName(), newMetadata));
                        }
                    }
                } finally {
                    file.close();
                }
            }
        } catch (IOException e) {
            LOG.error(CCStringUtils.stringifyException(e));
        }
        LOG.info("*** LIST:" + getListId() + " DONE LOADING SUBDOMAIN DATA FROM DISK");

        return itemsOut;
    }
}