Example usage for java.io RandomAccessFile getFilePointer

Introduction

In this page you can find the example usage for java.io RandomAccessFile getFilePointer.

Prototype

public native long getFilePointer() throws IOException;

Source Link

Document

Returns the current offset in this file.

Usage

From source file:big.BigZip.java

/**
 * Given a position inside our knowledge base, retrieve the data up to
 * the next file indicator.//from   w  w w . ja v  a2 s .co  m
 * @param targetFile    The new file that will be created
 * @param startPosition The position from where we start to read the data
 * @param endPosition
 * @return 
 */
public boolean extractBytes(final File targetFile, final long startPosition, final Long endPosition) {
    /**
     * This is a tricky method. We will be extracting data from a the BIG
     * archive onto a new file somewhere on disk. The biggest challenge here
     * is to find exactly when the data for the file ends and still do the
     * file copy with a wonderful performance.
     */
    try {
        // enable random access to the BIG file (fast as heck)
        RandomAccessFile dataBIG = new RandomAccessFile(fileMainBIG, "r");
        // if the target file exists, try to delete it
        if (targetFile.exists()) {
            targetFile.delete();
            if (targetFile.exists()) {
                // we failed completely
                System.out.println("BIG405 - Failed to delete: " + targetFile.getAbsolutePath());
                return false;
            }
        }
        // we need to create a temporary zip file holder
        File fileZip = new File("temp.zip");
        // delete the zip file if it already exists
        if (fileZip.exists()) {
            fileZip.delete();
            if (fileZip.exists()) {
                // we failed completely
                System.out.println("BIG416 - Failed to delete: " + fileZip.getAbsolutePath());
                return false;
            }
        }

        // create a new file
        RandomAccessFile dataNew = new RandomAccessFile(fileZip, "rw");
        // jump directly to the position where the file is positioned
        dataBIG.seek(startPosition);
        // now we start reading bytes during the mentioned interval
        while (dataBIG.getFilePointer() < endPosition) {
            // read a byte from our BIG archive
            int data = dataBIG.read();
            // write the same byte on the target file
            dataNew.write(data);
        }

        // close the file streams
        dataBIG.close();
        dataNew.close();

        // extract the file
        zip.extract(fileZip, new File("."));
        // delete the temp zip file
        fileZip.delete();

    } catch (FileNotFoundException ex) {
        Logger.getLogger(BigZip.class.getName()).log(Level.SEVERE, null, ex);
        return false;
    } catch (IOException ex) {
        Logger.getLogger(BigZip.class.getName()).log(Level.SEVERE, null, ex);
        return false;
    }

    return true;
}

From source file:org.commoncrawl.service.listcrawler.CrawlList.java

/** resubmit failed items 
 * //from   w  w  w  . j  a  v  a  2  s. c  o m
 * @param loader
 */
public void requeueFailedItems(CrawlQueueLoader loader) throws IOException {
    synchronized (this) {
        _queueState = QueueState.QUEUEING;
    }
    RandomAccessFile fixedDataReader = new RandomAccessFile(_fixedDataFile, "rw");
    RandomAccessFile stringDataReader = new RandomAccessFile(_variableDataFile, "rw");
    try {

        OnDiskCrawlHistoryItem item = new OnDiskCrawlHistoryItem();
        URLFP fingerprint = new URLFP();

        while (fixedDataReader.getFilePointer() != fixedDataReader.length()) {
            item.deserialize(fixedDataReader);
            boolean queueItem = false;
            if (item.isFlagSet(OnDiskCrawlHistoryItem.FLAG_HAS_CRAWL_STATUS)) {

                if (item.isFlagSet(OnDiskCrawlHistoryItem.FLAG_HAS_REDIRECT_STATUS)) {
                    queueItem = (item._redirectStatus != 0);

                    if (!queueItem) {
                        if (item._redirectHttpResult != 200 && item._redirectHttpResult != 404) {
                            queueItem = true;
                        }
                    }
                } else {
                    queueItem = (item._crawlStatus != 0);

                    if (!queueItem) {
                        if (item._httpResultCode != 200 && item._httpResultCode != 404) {
                            queueItem = true;
                        }
                    }
                }

                if (queueItem) {
                    // seek to string data 
                    stringDataReader.seek(item._stringsOffset);
                    // and skip buffer length 
                    WritableUtils.readVInt(stringDataReader);
                    // and read primary string 
                    String url = stringDataReader.readUTF();
                    // and spill
                    fingerprint.setDomainHash(item._domainHash);
                    fingerprint.setUrlHash(item._urlFingerprint);

                    loader.queueURL(fingerprint, url);
                }
            }
        }
    } catch (IOException e) {
        LOG.error("Encountered Exception Queueing Items for List:" + _listId + " Exception:"
                + CCStringUtils.stringifyException(e));
        _queueState = QueueState.QUEUED;
    } finally {
        fixedDataReader.close();
        stringDataReader.close();
    }
}

From source file:org.commoncrawl.service.listcrawler.CrawlList.java

/** queue uncrawled urls via the CrawlQueueLoader
 * /*from w ww .  j av  a  2 s.  c  o m*/
 * @param loader
 */
public void queueUnCrawledItems(CrawlQueueLoader loader) throws IOException {
    _queueState = QueueState.QUEUEING;

    int metadataVersion = getMetadata().getVersion();

    synchronized (_metadata) {
        // reset metadata PERIOD  
        int urlCount = _metadata.getUrlCount();
        _metadata.clear();
        _metadata.setUrlCount(urlCount);
    }

    RandomAccessFile fixedDataReader = new RandomAccessFile(_fixedDataFile, "rw");
    RandomAccessFile stringDataReader = new RandomAccessFile(_variableDataFile, "rw");
    try {

        OnDiskCrawlHistoryItem item = new OnDiskCrawlHistoryItem();
        URLFP fingerprint = new URLFP();

        while (fixedDataReader.getFilePointer() != fixedDataReader.length()) {

            long position = fixedDataReader.getFilePointer();

            //LOG.info("*** TRYING READ LOCK FOR OFFSET:" + position);
            while (true) {
                // get read lock on position ... 
                try {
                    FileLock lock = fixedDataReader.getChannel().tryLock(position,
                            OnDiskCrawlHistoryItem.ON_DISK_SIZE, false);

                    try {
                        //LOG.info("*** GOT READ LOCK FOR OFFSET:" + position);
                        item.deserialize(fixedDataReader);
                        break;
                    } finally {
                        lock.release();
                        //LOG.info("*** RELEASED READ LOCK FOR OFFSET:" + position);
                    }
                } catch (OverlappingFileLockException e) {
                    LOG.error("*** LOCK CONTENTION AT:" + position + " Exception:"
                            + CCStringUtils.stringifyException(e));
                }
            }

            // seek to string data 
            stringDataReader.seek(item._stringsOffset);
            // and skip buffer length 
            WritableUtils.readVInt(stringDataReader);
            // and read primary string 
            String url = stringDataReader.readUTF();
            // setup fingerprint 
            fingerprint.setDomainHash(item._domainHash);
            fingerprint.setUrlHash(item._urlFingerprint);

            // first, if it has not been crawled ever, crawl it not matter what ... 
            boolean crawlItem = !item.isFlagSet(OnDiskCrawlHistoryItem.FLAG_HAS_CRAWL_STATUS);

            // if it has been crawled ... check list metadata version ... 
            if (!crawlItem && metadataVersion >= 1) {
                // ok this is newer version of the list ... 
                // check refresh time if specified ...
                int refreshIntervalInSeconds = DEFAULT_REFRESH_INTERVAL_IN_SECS;

                if (getMetadata().getRefreshInterval() != 0) {
                    refreshIntervalInSeconds = getMetadata().getRefreshInterval();
                }

                if (item._updateTimestamp > 0) {
                    long timeSinceLastCrawl = item._updateTimestamp;
                    if (System.currentTimeMillis() - timeSinceLastCrawl >= (refreshIntervalInSeconds * 1000)) {
                        crawlItem = true;
                    }
                }
            }

            if (crawlItem) {

                loader.queueURL(fingerprint, url);

                synchronized (_metadata) {
                    // update queued item count 
                    _metadata.setQueuedItemCount(_metadata.getQueuedItemCount() + 1);
                }
            } else {
                updateMetadata(item, _metadata, 0);
            }
            // ok update subdomain stats 
            updateSubDomainMetadataForItemDuringLoad(item, url, fingerprint, crawlItem);
        }

        flushCachedSubDomainMetadata();

        loader.flush();

        _queueState = QueueState.QUEUED;
    } catch (IOException e) {
        LOG.error("Encountered Exception Queueing Items for List:" + _listId + " Exception:"
                + CCStringUtils.stringifyException(e));
        _queueState = QueueState.ERROR;
    } finally {
        fixedDataReader.close();
        stringDataReader.close();
    }
}

From source file:org.opencb.cellbase.mongodb.db.VariantAnnotationMongoDBAdaptorTest.java

private int getVepAnnotationBatch(RandomAccessFile raf, int nVariantsToRead,
        Set<AnnotationComparisonObject> vepAnnotationSet) throws IOException {
    /**//from w w w.j av a  2  s .  c o  m
     * Loads VEP annotation
     */
    String newLine;
    int nNonRegulatoryAnnotations = 0;
    int nReadVariants = 0;
    String previousChr = "";
    String previousPosition = "";
    String previousAlt = "";
    String alt;
    long filePointer = 0;

    if (nVariantsToRead > 0) {
        while (((newLine = raf.readLine()) != null) && nReadVariants <= nVariantsToRead) {
            String[] lineFields = newLine.split("\t");
            String[] coordinatesParts = lineFields[1].split(":");
            if (lineFields[2].equals("deletion")) {
                alt = "-";
            } else {
                alt = lineFields[2];
            }
            if (!previousChr.equals(coordinatesParts[0]) || !previousPosition.equals(coordinatesParts[1])
                    || !previousAlt.equals(alt)) {
                nReadVariants++;
            }
            if (nReadVariants <= nVariantsToRead) {
                for (String SOname : lineFields[6].split(",")) {
                    if (SOname.equals("nc_transcript_variant")) {
                        SOname = "non_coding_transcript_variant";
                    }
                    if (!SOname.equals("regulatory_region_variant")) {
                        nNonRegulatoryAnnotations++;
                    }
                    vepAnnotationSet.add(new AnnotationComparisonObject(coordinatesParts[0],
                            coordinatesParts[1], alt, lineFields[3], lineFields[4], SOname));
                }
                previousChr = coordinatesParts[0];
                previousPosition = coordinatesParts[1];
                previousAlt = alt;
                filePointer = raf.getFilePointer();
            }
        }

        raf.seek(filePointer);
    }

    return nNonRegulatoryAnnotations;
}

From source file:org.commoncrawl.service.listcrawler.HDFSFlusherThread.java

private long generateSequenceFileAndIndex(int itemFlushLimit, RandomAccessFile sourceLogFile, long startPos,
        long endPos, byte[] syncBytes, SequenceFile.Writer writer, DataOutput indexStreamOut,
        ArrayList<FingerprintAndOffsetTuple> tupleListOut) throws IOException {

    byte[] syncCheck = new byte[syncBytes.length];

    // and create a list to hold fingerprint / offset information
    Vector<FingerprintAndOffsetTuple> fpOffsetList = new Vector<FingerprintAndOffsetTuple>();

    long currentPos = startPos;

    LOG.info("Flushing Entries Starting up to offset:" + endPos);
    CacheItemHeader itemHeader = new CacheItemHeader();

    int itemsProcessed = 0;

    boolean ignoreFlushLimit = false;

    // start read 
    while (currentPos < endPos) {

        if ((endPos - currentPos) < LocalLogFileHeader.SYNC_BYTES_SIZE)
            break;

        // seek to current position ... 
        sourceLogFile.seek(currentPos);/*  w w w. j  av a  2s.  co  m*/

        boolean headerLoadFailed = false;

        try {
            // read the item header ... assuming things are good so far ... 
            itemHeader.readHeader(sourceLogFile);
        } catch (IOException e) {
            CacheManager.LOG.error("### Item Header Load At Position:" + currentPos + " Failed With Exception:"
                    + CCStringUtils.stringifyException(e));
            headerLoadFailed = true;
        }

        if (headerLoadFailed) {
            CacheManager.LOG
                    .error("### Item File Corrupt at position:" + currentPos + " Seeking Next Sync Point");
            currentPos += LocalLogFileHeader.SYNC_BYTES_SIZE;
        }

        // if header sync bytes don't match .. then seek to next sync position ... 
        if (headerLoadFailed || !Arrays.equals(itemHeader._sync, syncBytes)) {

            CacheManager.LOG
                    .error("### Item File Corrupt at position:" + currentPos + " Seeking Next Sync Point");

            // reseek to current pos 
            sourceLogFile.seek(currentPos);
            // read in a sync.length buffer amount 
            sourceLogFile.readFully(syncCheck);

            int syncLen = syncBytes.length;

            // start scan for next sync position ...
            for (int i = 0; sourceLogFile.getFilePointer() < endPos; i++) {
                int j = 0;
                for (; j < syncLen; j++) {
                    if (syncBytes[j] != syncCheck[(i + j) % syncLen])
                        break;
                }
                if (j == syncLen) {
                    sourceLogFile.seek(sourceLogFile.getFilePointer() - LocalLogFileHeader.SYNC_BYTES_SIZE); // position before sync
                    break;
                }
                syncCheck[i % syncLen] = sourceLogFile.readByte();
            }
            // whatever, happened file pointer is at current pos 
            currentPos = sourceLogFile.getFilePointer();

            if (currentPos < endPos) {
                CacheManager.LOG.info("### Item Loader Found another sync point at:" + currentPos);
            } else {
                CacheManager.LOG.error("### No more sync points found!");
            }
        } else {
            CacheManager.LOG
                    .info("WritingItem with FP:" + itemHeader._fingerprint + " Pos Is:" + writer.getLength());
            // track offset information for index building purposes   
            fpOffsetList.add(new FingerprintAndOffsetTuple(itemHeader._fingerprint, writer.getLength()));
            // read item data ...
            CacheItem cacheItem = new CacheItem();
            cacheItem.readFields(sourceLogFile);
            // now read content length 
            int contentLength = sourceLogFile.readInt();
            // and if content present... allocate buffer 
            if (contentLength != 0) {
                // allocate content buffer 
                byte[] contentBuffer = new byte[contentLength];
                // read it from disk 
                sourceLogFile.readFully(contentBuffer);
                // and set content into cache item 
                cacheItem.setContent(new Buffer(contentBuffer));
            }
            CacheManager.LOG.info("Adding to Sequence File Item with URL:" + cacheItem.getUrl());
            // write to sequence file ... 
            writer.append(new Text(cacheItem.getUrl()), cacheItem);
            // now seek past data
            currentPos += CacheItemHeader.SIZE + itemHeader._dataLength
                    + CacheManager.ITEM_RECORD_TRAILING_BYTES;
            // increment item count 
            itemsProcessed++;

        }

        if (!ignoreFlushLimit && itemsProcessed >= itemFlushLimit) {
            // ok this gets tricky now ...
            // figure out how many bytes of data were required to get to flush limit 
            long approxCheckpointSize = currentPos - startPos;
            // compute a  threshold number 
            long bytesThreshold = (long) (approxCheckpointSize * .70);
            // compute bytes remaining in checkpoint file ... 
            long bytesRemaining = endPos - currentPos;

            // ok if bytes remaining are less than threshold number then go ahead and gobble
            // everything up in a single pass (to prevent smaller subsequent index 
            if (bytesRemaining <= bytesThreshold) {
                // ignore the flush limit and keep on rolling to the end ...  
                ignoreFlushLimit = true;
                LOG.warn("*****Bytes Remaining:" + bytesRemaining + " less than % of last whole chkpt size:"
                        + approxCheckpointSize + ". Bypassing Flush Limit");
            } else {
                LOG.info("Reached Flush Item Limit:" + itemsProcessed + " Breaking Out");
                break;
            }

        }
    }

    LOG.info("Writing Index");
    // ok now build the index file ... 
    HDFSFileIndex.writeIndex(fpOffsetList, indexStreamOut);
    LOG.info("Done Writing Index. Total Items Written:" + fpOffsetList.size());
    // copy offset list into tuple list
    tupleListOut.addAll(fpOffsetList);

    return currentPos;
}

From source file:org.commoncrawl.service.listcrawler.CrawlList.java

void resetSubDomainCounts() throws IOException {

    LOG.info("*** LIST:" + getListId() + " Reset SubDomain Queued Counts.");

    if (_subDomainMetadataFile.exists()) {

        LOG.info("*** LIST:" + getListId() + " FILE EXISTS .");

        RandomAccessFile file = new RandomAccessFile(_subDomainMetadataFile, "rw");
        DataInputBuffer inputBuffer = new DataInputBuffer();
        DataOutputBuffer outputBuffer = new DataOutputBuffer(CrawlListMetadata.Constants.FixedDataSize);

        try {/*from  w w  w . j  ava 2s .  c o m*/
            // skip version 
            file.read();
            // read item count 
            int itemCount = file.readInt();

            LOG.info("*** LIST:" + getListId() + " SUBDOMAIN ITEM COUNT:" + itemCount);

            CrawlListMetadata newMetadata = new CrawlListMetadata();

            for (int i = 0; i < itemCount; ++i) {

                long orignalPos = file.getFilePointer();
                file.readFully(outputBuffer.getData(), 0, CrawlListMetadata.Constants.FixedDataSize);
                inputBuffer.reset(outputBuffer.getData(), CrawlListMetadata.Constants.FixedDataSize);
                try {
                    newMetadata.deserialize(inputBuffer, new BinaryProtocol());
                } catch (Exception e) {
                    LOG.error("-----Failed to Deserialize Metadata at Index:" + i + " Exception:"
                            + CCStringUtils.stringifyException(e));
                }
                // ok reset everything except hashes and first/last url pointers 
                int urlCount = newMetadata.getUrlCount();
                long firstRecordOffset = newMetadata.getFirstRecordOffset();
                long lastRecordOffset = newMetadata.getLastRecordOffset();
                String domainName = newMetadata.getDomainName();
                long domainHash = newMetadata.getDomainHash();

                // reset 
                newMetadata.clear();
                // restore 
                newMetadata.setUrlCount(urlCount);
                newMetadata.setFirstRecordOffset(firstRecordOffset);
                newMetadata.setLastRecordOffset(lastRecordOffset);
                newMetadata.setDomainName(domainName);
                newMetadata.setDomainHash(domainHash);

                // serialize it ... 
                outputBuffer.reset();
                newMetadata.serialize(outputBuffer, new BinaryProtocol());
                // write it back to disk 
                file.seek(orignalPos);
                // and rewrite it ... 
                file.write(outputBuffer.getData(), 0, CrawlListMetadata.Constants.FixedDataSize);
            }
        } finally {
            file.close();
        }
        LOG.info("*** LIST:" + getListId() + " DONE RESETTIGN SUBDOMAIN METADATA QUEUE COUNTS");
    }
}

From source file:org.commoncrawl.service.listcrawler.CrawlList.java

void loadSubDomainMetadataFromDisk() throws IOException {
    LOG.info("*** LIST:" + getListId() + " LOAD SUBDOMAIN METADATA FROM DISK ...  ");
    if (_subDomainMetadataFile.exists()) {

        LOG.info("*** LIST:" + getListId() + " FILE EXISTS LOADING SUBDOMAIN DATA FROM DISK.");

        RandomAccessFile file = new RandomAccessFile(_subDomainMetadataFile, "rw");
        DataInputBuffer inputBuffer = new DataInputBuffer();
        byte fixedDataBlock[] = new byte[CrawlListMetadata.Constants.FixedDataSize];

        try {//from w  w w. ja va2s  . c om
            // skip version 
            file.read();
            // read item count 
            int itemCount = file.readInt();

            LOG.info("*** LIST:" + getListId() + " SUBDOMAIN ITEM COUNT:" + itemCount);

            CrawlListMetadata newMetadata = new CrawlListMetadata();

            TreeMap<Long, Integer> idToOffsetMap = new TreeMap<Long, Integer>();
            for (int i = 0; i < itemCount; ++i) {

                long orignalPos = file.getFilePointer();
                file.readFully(fixedDataBlock, 0, fixedDataBlock.length);
                inputBuffer.reset(fixedDataBlock, fixedDataBlock.length);
                try {
                    newMetadata.deserialize(inputBuffer, new BinaryProtocol());
                } catch (Exception e) {
                    LOG.error("-----Failed to Deserialize Metadata at Index:" + i + " Exception:"
                            + CCStringUtils.stringifyException(e));
                }
                idToOffsetMap.put(newMetadata.getDomainHash(), (int) orignalPos);
            }

            // write lookup table 
            _offsetLookupTable = new DataOutputBuffer(idToOffsetMap.size() * OFFSET_TABLE_ENTRY_SIZE);
            for (Map.Entry<Long, Integer> entry : idToOffsetMap.entrySet()) {
                _offsetLookupTable.writeLong(entry.getKey());
                _offsetLookupTable.writeInt(entry.getValue());
            }
        } finally {
            file.close();
        }
        LOG.info("*** LIST:" + getListId() + " DONE LOADING SUBDOMAIN DATA FROM DISK");
    } else {

        LOG.info("*** LIST:" + getListId() + " SUBDOMAIN METADATA DOES NOT EXIST! LOADING FROM SCRATCH");

        RandomAccessFile fixedDataReader = new RandomAccessFile(_fixedDataFile, "rw");
        RandomAccessFile stringDataReader = new RandomAccessFile(_variableDataFile, "rw");

        try {

            //ok rebuild top level metadata as well 
            _metadata.clear();

            OnDiskCrawlHistoryItem item = new OnDiskCrawlHistoryItem();

            int processedCount = 0;
            while (fixedDataReader.getFilePointer() != fixedDataReader.length()) {

                long position = fixedDataReader.getFilePointer();

                // store offset in item 
                item._fileOffset = position;
                // load from disk 
                item.deserialize(fixedDataReader);
                try {
                    // seek to string data 
                    stringDataReader.seek(item._stringsOffset);
                    // and skip buffer length 
                    WritableUtils.readVInt(stringDataReader);
                    // and read primary string 
                    String url = stringDataReader.readUTF();

                    // get metadata object for subdomain 
                    CrawlListMetadata subDomainMetadata = getTransientSubDomainMetadata(url);

                    // increment url count 
                    subDomainMetadata.setUrlCount(subDomainMetadata.getUrlCount() + 1);

                    // increment top level metadata count 
                    _metadata.setUrlCount(_metadata.getUrlCount() + 1);

                    // update top level metadata ..
                    updateMetadata(item, _metadata, 0);

                    // update sub-domain metadata object  from item data
                    updateMetadata(item, subDomainMetadata, 0);

                    ++processedCount;
                } catch (IOException e) {
                    LOG.error("Exception Reading String Data For Item:" + (processedCount + 1));
                    LOG.error("Exception:" + CCStringUtils.stringifyException(e));
                    LOG.error("File Position:" + fixedDataReader.getFilePointer() + " StringsPointer:"
                            + stringDataReader.getFilePointer());
                }

                if (processedCount % 10000 == 0) {
                    LOG.info("*** LIST:" + getListId() + " Processed:" + processedCount + " Items");
                }
            }

            // ok commit top level metadata to disk as well 
            writeMetadataToDisk();

        } catch (IOException e) {
            LOG.error("Encountered Exception Queueing Items for List:" + _listId + " Exception:"
                    + CCStringUtils.stringifyException(e));
            LOG.error("File Position:" + fixedDataReader.getFilePointer() + " StringsPointer:"
                    + stringDataReader.getFilePointer());
            _queueState = QueueState.QUEUED;
        } finally {
            fixedDataReader.close();
            stringDataReader.close();
        }
        LOG.info("*** LIST:" + getListId() + " SUBDOMAIN METADATA REBUILT FROM LIST DATA . WRITING TO DISK");

        // write metadat to disk 
        writeInitialSubDomainMetadataToDisk();

        LOG.info("*** LIST:" + getListId() + " SUBDOMAIN METADATA REBUILT FROM LIST DATA . WRITE COMPLETE");
    }
}

From source file:org.opencb.cellbase.lib.db.VariantAnnotationCalculatorTest.java

private int getVepAnnotationBatch(RandomAccessFile raf, int nVariantsToRead,
        Set<AnnotationComparisonObject> vepAnnotationSet) throws IOException {
    /**//from ww w  . j a va2 s  .c  o m
     * Loads VEP annotation
     */
    String newLine;
    int nNonRegulatoryAnnotations = 0;
    int nReadVariants = 0;
    String previousChr = "";
    String previousPosition = "";
    String previousAlt = "";
    String alt;
    long filePointer = 0;

    if (nVariantsToRead > 0) {
        while (((newLine = raf.readLine()) != null) && nReadVariants <= nVariantsToRead) {
            String[] lineFields = newLine.split("\t");
            String[] coordinatesParts = lineFields[1].split(":");
            if (lineFields[2].equals("deletion")) {
                alt = "-";
            } else {
                alt = lineFields[2];
            }
            // TODO: Remove this if as refactoring implements consequence types for other variant types
            //                if(!alt.equals("-") && coordinatesParts[1].split("-").length==1) {
            if (!previousChr.equals(coordinatesParts[0]) || !previousPosition.equals(coordinatesParts[1])
                    || !previousAlt.equals(alt)) {
                nReadVariants++;
            }
            if (nReadVariants <= nVariantsToRead) {
                for (String SOname : lineFields[6].split(",")) {
                    if (SOname.equals("nc_transcript_variant")) {
                        SOname = "non_coding_transcript_variant";
                    }
                    if (!SOname.equals("regulatory_region_variant")) {
                        nNonRegulatoryAnnotations++;
                    }
                    vepAnnotationSet.add(new AnnotationComparisonObject(coordinatesParts[0],
                            coordinatesParts[1], alt, lineFields[3], lineFields[4], SOname));
                }
                previousChr = coordinatesParts[0];
                previousPosition = coordinatesParts[1];
                previousAlt = alt;
                filePointer = raf.getFilePointer();
            }
            //                }
        }

        raf.seek(filePointer);
    }

    return nNonRegulatoryAnnotations;
}

From source file:io.minio.MinioClient.java

/**
 * Skips data of up to given length in given input stream.
 *
 * @param inputStream  Input stream which is intance of {@link RandomAccessFile} or {@link BufferedInputStream}.
 * @param n            Length of bytes to skip.
 *///from   ww w .  j  a  va 2s  . co m
private void skipStream(Object inputStream, long n) throws IOException, InsufficientDataException {
    RandomAccessFile file = null;
    BufferedInputStream stream = null;
    if (inputStream instanceof RandomAccessFile) {
        file = (RandomAccessFile) inputStream;
    } else if (inputStream instanceof BufferedInputStream) {
        stream = (BufferedInputStream) inputStream;
    } else {
        throw new IllegalArgumentException("unsupported input stream object");
    }

    if (file != null) {
        file.seek(file.getFilePointer() + n);
        return;
    }

    long bytesSkipped;
    long totalBytesSkipped = 0;

    while ((bytesSkipped = stream.skip(n - totalBytesSkipped)) >= 0) {
        totalBytesSkipped += bytesSkipped;
        if (totalBytesSkipped == n) {
            return;
        }
    }

    throw new InsufficientDataException(
            "Insufficient data.  bytes skipped " + totalBytesSkipped + " expected " + n);
}

From source file:org.commoncrawl.service.listcrawler.CrawlList.java

public ArrayList<CrawlListDomainItem> getSubDomainList(int offset, int count) {
    synchronized (_metadata) {

        ArrayList<CrawlListDomainItem> itemsOut = new ArrayList<CrawlListDomainItem>();

        try {/*  www .ja  va  2 s . co  m*/
            synchronized (_subDomainMetadataFile) {
                RandomAccessFile file = new RandomAccessFile(_subDomainMetadataFile, "rw");
                DataInputBuffer inputBuffer = new DataInputBuffer();
                byte fixedDataBlock[] = new byte[CrawlListMetadata.Constants.FixedDataSize];

                try {
                    // skip version 
                    file.read();
                    // read item count 
                    int itemCount = file.readInt();

                    int i = offset;
                    int end = Math.min(i + count, itemCount);

                    LOG.info("*** LIST:" + getListId() + " SUBDOMAIN ITEM COUNT:" + itemCount);

                    if (i < itemCount) {

                        file.seek(5 + (CrawlListMetadata.Constants.FixedDataSize * offset));

                        CrawlListMetadata newMetadata = new CrawlListMetadata();

                        for (; i < end; ++i) {

                            long orignalPos = file.getFilePointer();
                            file.readFully(fixedDataBlock, 0, fixedDataBlock.length);
                            inputBuffer.reset(fixedDataBlock, fixedDataBlock.length);
                            newMetadata.deserialize(inputBuffer, new BinaryProtocol());
                            itemsOut.add(buildSubDomainSummary(newMetadata.getDomainName(), newMetadata));
                        }
                    }
                } finally {
                    file.close();
                }
            }
        } catch (IOException e) {
            LOG.error(CCStringUtils.stringifyException(e));
        }
        LOG.info("*** LIST:" + getListId() + " DONE LOADING SUBDOMAIN DATA FROM DISK");

        return itemsOut;
    }
}