List of usage examples for java.io RandomAccessFile getFilePointer
public native long getFilePointer() throws IOException;
From source file:big.BigZip.java
/** * Given a position inside our knowledge base, retrieve the data up to * the next file indicator.//from w w w . ja v a2 s .co m * @param targetFile The new file that will be created * @param startPosition The position from where we start to read the data * @param endPosition * @return */ public boolean extractBytes(final File targetFile, final long startPosition, final Long endPosition) { /** * This is a tricky method. We will be extracting data from a the BIG * archive onto a new file somewhere on disk. The biggest challenge here * is to find exactly when the data for the file ends and still do the * file copy with a wonderful performance. */ try { // enable random access to the BIG file (fast as heck) RandomAccessFile dataBIG = new RandomAccessFile(fileMainBIG, "r"); // if the target file exists, try to delete it if (targetFile.exists()) { targetFile.delete(); if (targetFile.exists()) { // we failed completely System.out.println("BIG405 - Failed to delete: " + targetFile.getAbsolutePath()); return false; } } // we need to create a temporary zip file holder File fileZip = new File("temp.zip"); // delete the zip file if it already exists if (fileZip.exists()) { fileZip.delete(); if (fileZip.exists()) { // we failed completely System.out.println("BIG416 - Failed to delete: " + fileZip.getAbsolutePath()); return false; } } // create a new file RandomAccessFile dataNew = new RandomAccessFile(fileZip, "rw"); // jump directly to the position where the file is positioned dataBIG.seek(startPosition); // now we start reading bytes during the mentioned interval while (dataBIG.getFilePointer() < endPosition) { // read a byte from our BIG archive int data = dataBIG.read(); // write the same byte on the target file dataNew.write(data); } // close the file streams dataBIG.close(); dataNew.close(); // extract the file zip.extract(fileZip, new File(".")); // delete the temp zip file fileZip.delete(); } catch (FileNotFoundException ex) { Logger.getLogger(BigZip.class.getName()).log(Level.SEVERE, null, ex); return false; } catch (IOException ex) { Logger.getLogger(BigZip.class.getName()).log(Level.SEVERE, null, ex); return false; } return true; }
From source file:org.commoncrawl.service.listcrawler.CrawlList.java
/** resubmit failed items * //from w w w . j a v a 2 s. c o m * @param loader */ public void requeueFailedItems(CrawlQueueLoader loader) throws IOException { synchronized (this) { _queueState = QueueState.QUEUEING; } RandomAccessFile fixedDataReader = new RandomAccessFile(_fixedDataFile, "rw"); RandomAccessFile stringDataReader = new RandomAccessFile(_variableDataFile, "rw"); try { OnDiskCrawlHistoryItem item = new OnDiskCrawlHistoryItem(); URLFP fingerprint = new URLFP(); while (fixedDataReader.getFilePointer() != fixedDataReader.length()) { item.deserialize(fixedDataReader); boolean queueItem = false; if (item.isFlagSet(OnDiskCrawlHistoryItem.FLAG_HAS_CRAWL_STATUS)) { if (item.isFlagSet(OnDiskCrawlHistoryItem.FLAG_HAS_REDIRECT_STATUS)) { queueItem = (item._redirectStatus != 0); if (!queueItem) { if (item._redirectHttpResult != 200 && item._redirectHttpResult != 404) { queueItem = true; } } } else { queueItem = (item._crawlStatus != 0); if (!queueItem) { if (item._httpResultCode != 200 && item._httpResultCode != 404) { queueItem = true; } } } if (queueItem) { // seek to string data stringDataReader.seek(item._stringsOffset); // and skip buffer length WritableUtils.readVInt(stringDataReader); // and read primary string String url = stringDataReader.readUTF(); // and spill fingerprint.setDomainHash(item._domainHash); fingerprint.setUrlHash(item._urlFingerprint); loader.queueURL(fingerprint, url); } } } } catch (IOException e) { LOG.error("Encountered Exception Queueing Items for List:" + _listId + " Exception:" + CCStringUtils.stringifyException(e)); _queueState = QueueState.QUEUED; } finally { fixedDataReader.close(); stringDataReader.close(); } }
From source file:org.commoncrawl.service.listcrawler.CrawlList.java
/** queue uncrawled urls via the CrawlQueueLoader * /*from w ww . j av a 2 s. c o m*/ * @param loader */ public void queueUnCrawledItems(CrawlQueueLoader loader) throws IOException { _queueState = QueueState.QUEUEING; int metadataVersion = getMetadata().getVersion(); synchronized (_metadata) { // reset metadata PERIOD int urlCount = _metadata.getUrlCount(); _metadata.clear(); _metadata.setUrlCount(urlCount); } RandomAccessFile fixedDataReader = new RandomAccessFile(_fixedDataFile, "rw"); RandomAccessFile stringDataReader = new RandomAccessFile(_variableDataFile, "rw"); try { OnDiskCrawlHistoryItem item = new OnDiskCrawlHistoryItem(); URLFP fingerprint = new URLFP(); while (fixedDataReader.getFilePointer() != fixedDataReader.length()) { long position = fixedDataReader.getFilePointer(); //LOG.info("*** TRYING READ LOCK FOR OFFSET:" + position); while (true) { // get read lock on position ... try { FileLock lock = fixedDataReader.getChannel().tryLock(position, OnDiskCrawlHistoryItem.ON_DISK_SIZE, false); try { //LOG.info("*** GOT READ LOCK FOR OFFSET:" + position); item.deserialize(fixedDataReader); break; } finally { lock.release(); //LOG.info("*** RELEASED READ LOCK FOR OFFSET:" + position); } } catch (OverlappingFileLockException e) { LOG.error("*** LOCK CONTENTION AT:" + position + " Exception:" + CCStringUtils.stringifyException(e)); } } // seek to string data stringDataReader.seek(item._stringsOffset); // and skip buffer length WritableUtils.readVInt(stringDataReader); // and read primary string String url = stringDataReader.readUTF(); // setup fingerprint fingerprint.setDomainHash(item._domainHash); fingerprint.setUrlHash(item._urlFingerprint); // first, if it has not been crawled ever, crawl it not matter what ... boolean crawlItem = !item.isFlagSet(OnDiskCrawlHistoryItem.FLAG_HAS_CRAWL_STATUS); // if it has been crawled ... check list metadata version ... if (!crawlItem && metadataVersion >= 1) { // ok this is newer version of the list ... // check refresh time if specified ... int refreshIntervalInSeconds = DEFAULT_REFRESH_INTERVAL_IN_SECS; if (getMetadata().getRefreshInterval() != 0) { refreshIntervalInSeconds = getMetadata().getRefreshInterval(); } if (item._updateTimestamp > 0) { long timeSinceLastCrawl = item._updateTimestamp; if (System.currentTimeMillis() - timeSinceLastCrawl >= (refreshIntervalInSeconds * 1000)) { crawlItem = true; } } } if (crawlItem) { loader.queueURL(fingerprint, url); synchronized (_metadata) { // update queued item count _metadata.setQueuedItemCount(_metadata.getQueuedItemCount() + 1); } } else { updateMetadata(item, _metadata, 0); } // ok update subdomain stats updateSubDomainMetadataForItemDuringLoad(item, url, fingerprint, crawlItem); } flushCachedSubDomainMetadata(); loader.flush(); _queueState = QueueState.QUEUED; } catch (IOException e) { LOG.error("Encountered Exception Queueing Items for List:" + _listId + " Exception:" + CCStringUtils.stringifyException(e)); _queueState = QueueState.ERROR; } finally { fixedDataReader.close(); stringDataReader.close(); } }
From source file:org.opencb.cellbase.mongodb.db.VariantAnnotationMongoDBAdaptorTest.java
private int getVepAnnotationBatch(RandomAccessFile raf, int nVariantsToRead, Set<AnnotationComparisonObject> vepAnnotationSet) throws IOException { /**//from w w w.j av a 2 s . c o m * Loads VEP annotation */ String newLine; int nNonRegulatoryAnnotations = 0; int nReadVariants = 0; String previousChr = ""; String previousPosition = ""; String previousAlt = ""; String alt; long filePointer = 0; if (nVariantsToRead > 0) { while (((newLine = raf.readLine()) != null) && nReadVariants <= nVariantsToRead) { String[] lineFields = newLine.split("\t"); String[] coordinatesParts = lineFields[1].split(":"); if (lineFields[2].equals("deletion")) { alt = "-"; } else { alt = lineFields[2]; } if (!previousChr.equals(coordinatesParts[0]) || !previousPosition.equals(coordinatesParts[1]) || !previousAlt.equals(alt)) { nReadVariants++; } if (nReadVariants <= nVariantsToRead) { for (String SOname : lineFields[6].split(",")) { if (SOname.equals("nc_transcript_variant")) { SOname = "non_coding_transcript_variant"; } if (!SOname.equals("regulatory_region_variant")) { nNonRegulatoryAnnotations++; } vepAnnotationSet.add(new AnnotationComparisonObject(coordinatesParts[0], coordinatesParts[1], alt, lineFields[3], lineFields[4], SOname)); } previousChr = coordinatesParts[0]; previousPosition = coordinatesParts[1]; previousAlt = alt; filePointer = raf.getFilePointer(); } } raf.seek(filePointer); } return nNonRegulatoryAnnotations; }
From source file:org.commoncrawl.service.listcrawler.HDFSFlusherThread.java
private long generateSequenceFileAndIndex(int itemFlushLimit, RandomAccessFile sourceLogFile, long startPos, long endPos, byte[] syncBytes, SequenceFile.Writer writer, DataOutput indexStreamOut, ArrayList<FingerprintAndOffsetTuple> tupleListOut) throws IOException { byte[] syncCheck = new byte[syncBytes.length]; // and create a list to hold fingerprint / offset information Vector<FingerprintAndOffsetTuple> fpOffsetList = new Vector<FingerprintAndOffsetTuple>(); long currentPos = startPos; LOG.info("Flushing Entries Starting up to offset:" + endPos); CacheItemHeader itemHeader = new CacheItemHeader(); int itemsProcessed = 0; boolean ignoreFlushLimit = false; // start read while (currentPos < endPos) { if ((endPos - currentPos) < LocalLogFileHeader.SYNC_BYTES_SIZE) break; // seek to current position ... sourceLogFile.seek(currentPos);/* w w w. j av a 2s. co m*/ boolean headerLoadFailed = false; try { // read the item header ... assuming things are good so far ... itemHeader.readHeader(sourceLogFile); } catch (IOException e) { CacheManager.LOG.error("### Item Header Load At Position:" + currentPos + " Failed With Exception:" + CCStringUtils.stringifyException(e)); headerLoadFailed = true; } if (headerLoadFailed) { CacheManager.LOG .error("### Item File Corrupt at position:" + currentPos + " Seeking Next Sync Point"); currentPos += LocalLogFileHeader.SYNC_BYTES_SIZE; } // if header sync bytes don't match .. then seek to next sync position ... if (headerLoadFailed || !Arrays.equals(itemHeader._sync, syncBytes)) { CacheManager.LOG .error("### Item File Corrupt at position:" + currentPos + " Seeking Next Sync Point"); // reseek to current pos sourceLogFile.seek(currentPos); // read in a sync.length buffer amount sourceLogFile.readFully(syncCheck); int syncLen = syncBytes.length; // start scan for next sync position ... for (int i = 0; sourceLogFile.getFilePointer() < endPos; i++) { int j = 0; for (; j < syncLen; j++) { if (syncBytes[j] != syncCheck[(i + j) % syncLen]) break; } if (j == syncLen) { sourceLogFile.seek(sourceLogFile.getFilePointer() - LocalLogFileHeader.SYNC_BYTES_SIZE); // position before sync break; } syncCheck[i % syncLen] = sourceLogFile.readByte(); } // whatever, happened file pointer is at current pos currentPos = sourceLogFile.getFilePointer(); if (currentPos < endPos) { CacheManager.LOG.info("### Item Loader Found another sync point at:" + currentPos); } else { CacheManager.LOG.error("### No more sync points found!"); } } else { CacheManager.LOG .info("WritingItem with FP:" + itemHeader._fingerprint + " Pos Is:" + writer.getLength()); // track offset information for index building purposes fpOffsetList.add(new FingerprintAndOffsetTuple(itemHeader._fingerprint, writer.getLength())); // read item data ... CacheItem cacheItem = new CacheItem(); cacheItem.readFields(sourceLogFile); // now read content length int contentLength = sourceLogFile.readInt(); // and if content present... allocate buffer if (contentLength != 0) { // allocate content buffer byte[] contentBuffer = new byte[contentLength]; // read it from disk sourceLogFile.readFully(contentBuffer); // and set content into cache item cacheItem.setContent(new Buffer(contentBuffer)); } CacheManager.LOG.info("Adding to Sequence File Item with URL:" + cacheItem.getUrl()); // write to sequence file ... writer.append(new Text(cacheItem.getUrl()), cacheItem); // now seek past data currentPos += CacheItemHeader.SIZE + itemHeader._dataLength + CacheManager.ITEM_RECORD_TRAILING_BYTES; // increment item count itemsProcessed++; } if (!ignoreFlushLimit && itemsProcessed >= itemFlushLimit) { // ok this gets tricky now ... // figure out how many bytes of data were required to get to flush limit long approxCheckpointSize = currentPos - startPos; // compute a threshold number long bytesThreshold = (long) (approxCheckpointSize * .70); // compute bytes remaining in checkpoint file ... long bytesRemaining = endPos - currentPos; // ok if bytes remaining are less than threshold number then go ahead and gobble // everything up in a single pass (to prevent smaller subsequent index if (bytesRemaining <= bytesThreshold) { // ignore the flush limit and keep on rolling to the end ... ignoreFlushLimit = true; LOG.warn("*****Bytes Remaining:" + bytesRemaining + " less than % of last whole chkpt size:" + approxCheckpointSize + ". Bypassing Flush Limit"); } else { LOG.info("Reached Flush Item Limit:" + itemsProcessed + " Breaking Out"); break; } } } LOG.info("Writing Index"); // ok now build the index file ... HDFSFileIndex.writeIndex(fpOffsetList, indexStreamOut); LOG.info("Done Writing Index. Total Items Written:" + fpOffsetList.size()); // copy offset list into tuple list tupleListOut.addAll(fpOffsetList); return currentPos; }
From source file:org.commoncrawl.service.listcrawler.CrawlList.java
void resetSubDomainCounts() throws IOException { LOG.info("*** LIST:" + getListId() + " Reset SubDomain Queued Counts."); if (_subDomainMetadataFile.exists()) { LOG.info("*** LIST:" + getListId() + " FILE EXISTS ."); RandomAccessFile file = new RandomAccessFile(_subDomainMetadataFile, "rw"); DataInputBuffer inputBuffer = new DataInputBuffer(); DataOutputBuffer outputBuffer = new DataOutputBuffer(CrawlListMetadata.Constants.FixedDataSize); try {/*from w w w . j ava 2s . c o m*/ // skip version file.read(); // read item count int itemCount = file.readInt(); LOG.info("*** LIST:" + getListId() + " SUBDOMAIN ITEM COUNT:" + itemCount); CrawlListMetadata newMetadata = new CrawlListMetadata(); for (int i = 0; i < itemCount; ++i) { long orignalPos = file.getFilePointer(); file.readFully(outputBuffer.getData(), 0, CrawlListMetadata.Constants.FixedDataSize); inputBuffer.reset(outputBuffer.getData(), CrawlListMetadata.Constants.FixedDataSize); try { newMetadata.deserialize(inputBuffer, new BinaryProtocol()); } catch (Exception e) { LOG.error("-----Failed to Deserialize Metadata at Index:" + i + " Exception:" + CCStringUtils.stringifyException(e)); } // ok reset everything except hashes and first/last url pointers int urlCount = newMetadata.getUrlCount(); long firstRecordOffset = newMetadata.getFirstRecordOffset(); long lastRecordOffset = newMetadata.getLastRecordOffset(); String domainName = newMetadata.getDomainName(); long domainHash = newMetadata.getDomainHash(); // reset newMetadata.clear(); // restore newMetadata.setUrlCount(urlCount); newMetadata.setFirstRecordOffset(firstRecordOffset); newMetadata.setLastRecordOffset(lastRecordOffset); newMetadata.setDomainName(domainName); newMetadata.setDomainHash(domainHash); // serialize it ... outputBuffer.reset(); newMetadata.serialize(outputBuffer, new BinaryProtocol()); // write it back to disk file.seek(orignalPos); // and rewrite it ... file.write(outputBuffer.getData(), 0, CrawlListMetadata.Constants.FixedDataSize); } } finally { file.close(); } LOG.info("*** LIST:" + getListId() + " DONE RESETTIGN SUBDOMAIN METADATA QUEUE COUNTS"); } }
From source file:org.commoncrawl.service.listcrawler.CrawlList.java
void loadSubDomainMetadataFromDisk() throws IOException { LOG.info("*** LIST:" + getListId() + " LOAD SUBDOMAIN METADATA FROM DISK ... "); if (_subDomainMetadataFile.exists()) { LOG.info("*** LIST:" + getListId() + " FILE EXISTS LOADING SUBDOMAIN DATA FROM DISK."); RandomAccessFile file = new RandomAccessFile(_subDomainMetadataFile, "rw"); DataInputBuffer inputBuffer = new DataInputBuffer(); byte fixedDataBlock[] = new byte[CrawlListMetadata.Constants.FixedDataSize]; try {//from w w w. ja va2s . c om // skip version file.read(); // read item count int itemCount = file.readInt(); LOG.info("*** LIST:" + getListId() + " SUBDOMAIN ITEM COUNT:" + itemCount); CrawlListMetadata newMetadata = new CrawlListMetadata(); TreeMap<Long, Integer> idToOffsetMap = new TreeMap<Long, Integer>(); for (int i = 0; i < itemCount; ++i) { long orignalPos = file.getFilePointer(); file.readFully(fixedDataBlock, 0, fixedDataBlock.length); inputBuffer.reset(fixedDataBlock, fixedDataBlock.length); try { newMetadata.deserialize(inputBuffer, new BinaryProtocol()); } catch (Exception e) { LOG.error("-----Failed to Deserialize Metadata at Index:" + i + " Exception:" + CCStringUtils.stringifyException(e)); } idToOffsetMap.put(newMetadata.getDomainHash(), (int) orignalPos); } // write lookup table _offsetLookupTable = new DataOutputBuffer(idToOffsetMap.size() * OFFSET_TABLE_ENTRY_SIZE); for (Map.Entry<Long, Integer> entry : idToOffsetMap.entrySet()) { _offsetLookupTable.writeLong(entry.getKey()); _offsetLookupTable.writeInt(entry.getValue()); } } finally { file.close(); } LOG.info("*** LIST:" + getListId() + " DONE LOADING SUBDOMAIN DATA FROM DISK"); } else { LOG.info("*** LIST:" + getListId() + " SUBDOMAIN METADATA DOES NOT EXIST! LOADING FROM SCRATCH"); RandomAccessFile fixedDataReader = new RandomAccessFile(_fixedDataFile, "rw"); RandomAccessFile stringDataReader = new RandomAccessFile(_variableDataFile, "rw"); try { //ok rebuild top level metadata as well _metadata.clear(); OnDiskCrawlHistoryItem item = new OnDiskCrawlHistoryItem(); int processedCount = 0; while (fixedDataReader.getFilePointer() != fixedDataReader.length()) { long position = fixedDataReader.getFilePointer(); // store offset in item item._fileOffset = position; // load from disk item.deserialize(fixedDataReader); try { // seek to string data stringDataReader.seek(item._stringsOffset); // and skip buffer length WritableUtils.readVInt(stringDataReader); // and read primary string String url = stringDataReader.readUTF(); // get metadata object for subdomain CrawlListMetadata subDomainMetadata = getTransientSubDomainMetadata(url); // increment url count subDomainMetadata.setUrlCount(subDomainMetadata.getUrlCount() + 1); // increment top level metadata count _metadata.setUrlCount(_metadata.getUrlCount() + 1); // update top level metadata .. updateMetadata(item, _metadata, 0); // update sub-domain metadata object from item data updateMetadata(item, subDomainMetadata, 0); ++processedCount; } catch (IOException e) { LOG.error("Exception Reading String Data For Item:" + (processedCount + 1)); LOG.error("Exception:" + CCStringUtils.stringifyException(e)); LOG.error("File Position:" + fixedDataReader.getFilePointer() + " StringsPointer:" + stringDataReader.getFilePointer()); } if (processedCount % 10000 == 0) { LOG.info("*** LIST:" + getListId() + " Processed:" + processedCount + " Items"); } } // ok commit top level metadata to disk as well writeMetadataToDisk(); } catch (IOException e) { LOG.error("Encountered Exception Queueing Items for List:" + _listId + " Exception:" + CCStringUtils.stringifyException(e)); LOG.error("File Position:" + fixedDataReader.getFilePointer() + " StringsPointer:" + stringDataReader.getFilePointer()); _queueState = QueueState.QUEUED; } finally { fixedDataReader.close(); stringDataReader.close(); } LOG.info("*** LIST:" + getListId() + " SUBDOMAIN METADATA REBUILT FROM LIST DATA . WRITING TO DISK"); // write metadat to disk writeInitialSubDomainMetadataToDisk(); LOG.info("*** LIST:" + getListId() + " SUBDOMAIN METADATA REBUILT FROM LIST DATA . WRITE COMPLETE"); } }
From source file:org.opencb.cellbase.lib.db.VariantAnnotationCalculatorTest.java
private int getVepAnnotationBatch(RandomAccessFile raf, int nVariantsToRead, Set<AnnotationComparisonObject> vepAnnotationSet) throws IOException { /**//from ww w . j a va2 s .c o m * Loads VEP annotation */ String newLine; int nNonRegulatoryAnnotations = 0; int nReadVariants = 0; String previousChr = ""; String previousPosition = ""; String previousAlt = ""; String alt; long filePointer = 0; if (nVariantsToRead > 0) { while (((newLine = raf.readLine()) != null) && nReadVariants <= nVariantsToRead) { String[] lineFields = newLine.split("\t"); String[] coordinatesParts = lineFields[1].split(":"); if (lineFields[2].equals("deletion")) { alt = "-"; } else { alt = lineFields[2]; } // TODO: Remove this if as refactoring implements consequence types for other variant types // if(!alt.equals("-") && coordinatesParts[1].split("-").length==1) { if (!previousChr.equals(coordinatesParts[0]) || !previousPosition.equals(coordinatesParts[1]) || !previousAlt.equals(alt)) { nReadVariants++; } if (nReadVariants <= nVariantsToRead) { for (String SOname : lineFields[6].split(",")) { if (SOname.equals("nc_transcript_variant")) { SOname = "non_coding_transcript_variant"; } if (!SOname.equals("regulatory_region_variant")) { nNonRegulatoryAnnotations++; } vepAnnotationSet.add(new AnnotationComparisonObject(coordinatesParts[0], coordinatesParts[1], alt, lineFields[3], lineFields[4], SOname)); } previousChr = coordinatesParts[0]; previousPosition = coordinatesParts[1]; previousAlt = alt; filePointer = raf.getFilePointer(); } // } } raf.seek(filePointer); } return nNonRegulatoryAnnotations; }
From source file:io.minio.MinioClient.java
/** * Skips data of up to given length in given input stream. * * @param inputStream Input stream which is intance of {@link RandomAccessFile} or {@link BufferedInputStream}. * @param n Length of bytes to skip. *///from ww w . j a va 2s . co m private void skipStream(Object inputStream, long n) throws IOException, InsufficientDataException { RandomAccessFile file = null; BufferedInputStream stream = null; if (inputStream instanceof RandomAccessFile) { file = (RandomAccessFile) inputStream; } else if (inputStream instanceof BufferedInputStream) { stream = (BufferedInputStream) inputStream; } else { throw new IllegalArgumentException("unsupported input stream object"); } if (file != null) { file.seek(file.getFilePointer() + n); return; } long bytesSkipped; long totalBytesSkipped = 0; while ((bytesSkipped = stream.skip(n - totalBytesSkipped)) >= 0) { totalBytesSkipped += bytesSkipped; if (totalBytesSkipped == n) { return; } } throw new InsufficientDataException( "Insufficient data. bytes skipped " + totalBytesSkipped + " expected " + n); }
From source file:org.commoncrawl.service.listcrawler.CrawlList.java
public ArrayList<CrawlListDomainItem> getSubDomainList(int offset, int count) { synchronized (_metadata) { ArrayList<CrawlListDomainItem> itemsOut = new ArrayList<CrawlListDomainItem>(); try {/* www .ja va 2 s . co m*/ synchronized (_subDomainMetadataFile) { RandomAccessFile file = new RandomAccessFile(_subDomainMetadataFile, "rw"); DataInputBuffer inputBuffer = new DataInputBuffer(); byte fixedDataBlock[] = new byte[CrawlListMetadata.Constants.FixedDataSize]; try { // skip version file.read(); // read item count int itemCount = file.readInt(); int i = offset; int end = Math.min(i + count, itemCount); LOG.info("*** LIST:" + getListId() + " SUBDOMAIN ITEM COUNT:" + itemCount); if (i < itemCount) { file.seek(5 + (CrawlListMetadata.Constants.FixedDataSize * offset)); CrawlListMetadata newMetadata = new CrawlListMetadata(); for (; i < end; ++i) { long orignalPos = file.getFilePointer(); file.readFully(fixedDataBlock, 0, fixedDataBlock.length); inputBuffer.reset(fixedDataBlock, fixedDataBlock.length); newMetadata.deserialize(inputBuffer, new BinaryProtocol()); itemsOut.add(buildSubDomainSummary(newMetadata.getDomainName(), newMetadata)); } } } finally { file.close(); } } } catch (IOException e) { LOG.error(CCStringUtils.stringifyException(e)); } LOG.info("*** LIST:" + getListId() + " DONE LOADING SUBDOMAIN DATA FROM DISK"); return itemsOut; } }