List of usage examples for java.io RandomAccessFile getChannel
public final FileChannel getChannel()
From source file:org.commoncrawl.service.listcrawler.CrawlList.java
/** queue uncrawled urls via the CrawlQueueLoader * //from ww w .j a v a 2 s . c o m * @param loader */ public void queueUnCrawledItems(CrawlQueueLoader loader) throws IOException { _queueState = QueueState.QUEUEING; int metadataVersion = getMetadata().getVersion(); synchronized (_metadata) { // reset metadata PERIOD int urlCount = _metadata.getUrlCount(); _metadata.clear(); _metadata.setUrlCount(urlCount); } RandomAccessFile fixedDataReader = new RandomAccessFile(_fixedDataFile, "rw"); RandomAccessFile stringDataReader = new RandomAccessFile(_variableDataFile, "rw"); try { OnDiskCrawlHistoryItem item = new OnDiskCrawlHistoryItem(); URLFP fingerprint = new URLFP(); while (fixedDataReader.getFilePointer() != fixedDataReader.length()) { long position = fixedDataReader.getFilePointer(); //LOG.info("*** TRYING READ LOCK FOR OFFSET:" + position); while (true) { // get read lock on position ... try { FileLock lock = fixedDataReader.getChannel().tryLock(position, OnDiskCrawlHistoryItem.ON_DISK_SIZE, false); try { //LOG.info("*** GOT READ LOCK FOR OFFSET:" + position); item.deserialize(fixedDataReader); break; } finally { lock.release(); //LOG.info("*** RELEASED READ LOCK FOR OFFSET:" + position); } } catch (OverlappingFileLockException e) { LOG.error("*** LOCK CONTENTION AT:" + position + " Exception:" + CCStringUtils.stringifyException(e)); } } // seek to string data stringDataReader.seek(item._stringsOffset); // and skip buffer length WritableUtils.readVInt(stringDataReader); // and read primary string String url = stringDataReader.readUTF(); // setup fingerprint fingerprint.setDomainHash(item._domainHash); fingerprint.setUrlHash(item._urlFingerprint); // first, if it has not been crawled ever, crawl it not matter what ... boolean crawlItem = !item.isFlagSet(OnDiskCrawlHistoryItem.FLAG_HAS_CRAWL_STATUS); // if it has been crawled ... check list metadata version ... if (!crawlItem && metadataVersion >= 1) { // ok this is newer version of the list ... // check refresh time if specified ... int refreshIntervalInSeconds = DEFAULT_REFRESH_INTERVAL_IN_SECS; if (getMetadata().getRefreshInterval() != 0) { refreshIntervalInSeconds = getMetadata().getRefreshInterval(); } if (item._updateTimestamp > 0) { long timeSinceLastCrawl = item._updateTimestamp; if (System.currentTimeMillis() - timeSinceLastCrawl >= (refreshIntervalInSeconds * 1000)) { crawlItem = true; } } } if (crawlItem) { loader.queueURL(fingerprint, url); synchronized (_metadata) { // update queued item count _metadata.setQueuedItemCount(_metadata.getQueuedItemCount() + 1); } } else { updateMetadata(item, _metadata, 0); } // ok update subdomain stats updateSubDomainMetadataForItemDuringLoad(item, url, fingerprint, crawlItem); } flushCachedSubDomainMetadata(); loader.flush(); _queueState = QueueState.QUEUED; } catch (IOException e) { LOG.error("Encountered Exception Queueing Items for List:" + _listId + " Exception:" + CCStringUtils.stringifyException(e)); _queueState = QueueState.ERROR; } finally { fixedDataReader.close(); stringDataReader.close(); } }