org.commoncrawl.service.queryserver.query.URLLinksQuery.java Source code

Java tutorial

Introduction

Here is the source code for org.commoncrawl.service.queryserver.query.URLLinksQuery.java

Source

/**
 * Copyright 2008 - CommonCrawl Foundation
 * 
 *    This program is free software: you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation, either version 3 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 **/

package org.commoncrawl.service.queryserver.query;

import java.io.File;
import java.io.IOException;
import java.text.NumberFormat;
import java.util.ArrayList;
import java.util.Collections;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocalFileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DataInputBuffer;
import org.apache.hadoop.io.DataOutputBuffer;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.commoncrawl.async.EventLoop;
import org.commoncrawl.protocol.CrawlDatumAndMetadata;
import org.commoncrawl.protocol.SubDomainMetadata;
import org.commoncrawl.protocol.URLFPV2;
import org.commoncrawl.service.queryserver.ClientQueryInfo;
import org.commoncrawl.service.queryserver.InlinkingDomainInfo;
import org.commoncrawl.service.queryserver.ShardIndexHostNameTuple;
import org.commoncrawl.service.queryserver.URLLinkDetailQueryInfo;
import org.commoncrawl.service.queryserver.index.DatabaseIndexV2;
import org.commoncrawl.service.queryserver.index.DatabaseIndexV2.SlaveDatabaseIndex;
import org.commoncrawl.service.queryserver.index.DatabaseIndexV2.MasterDatabaseIndex.MetadataOut;
import org.commoncrawl.util.CCStringUtils;
import org.commoncrawl.util.CompressedURLFPListV2;
import org.commoncrawl.util.FlexBuffer;

/**
 * 
 * @author rana
 *
 */
public class URLLinksQuery extends Query<URLLinkDetailQueryInfo, Writable, Writable> {

    private static final Log LOG = LogFactory.getLog(URLLinksQuery.class);

    public URLLinksQuery() {

    }

    public URLLinksQuery(URLLinkDetailQueryInfo queryInfo) {
        setQueryData(queryInfo);
    }

    @Override
    public String getCanonicalId() {
        if (getQueryData().getQueryType() == URLLinkDetailQueryInfo.QueryType.LINKS_QUERY) {
            return encodePatternAsFilename("ULQ:" + getQueryData().getTargetURLFP().getDomainHash() + ":"
                    + getQueryData().getTargetURLFP().getUrlHash());
        } else if (getQueryData().getQueryType() == URLLinkDetailQueryInfo.QueryType.INVERSE_QUERY) {
            return encodePatternAsFilename("UILQ:" + getQueryData().getTargetURLFP().getDomainHash() + ":"
                    + getQueryData().getTargetURLFP().getUrlHash());
        } else if (getQueryData().getQueryType() == URLLinkDetailQueryInfo.QueryType.INVERSE_BY_DOMAIN_QUERY
                || getQueryData()
                        .getQueryType() == URLLinkDetailQueryInfo.QueryType.INVERSE_BY_DOMAIN_DETAIL_QUERY) {
            return encodePatternAsFilename("UILBDQ:" + getQueryData().getTargetURLFP().getDomainHash() + ":"
                    + getQueryData().getTargetURLFP().getUrlHash());
        } else {
            return "";
            //throw new IOException("Unspecified Link Query Type!");
        }
    }

    protected static final NumberFormat NUMBER_FORMAT = NumberFormat.getInstance();
    static {
        NUMBER_FORMAT.setMinimumIntegerDigits(5);
        NUMBER_FORMAT.setGroupingUsed(false);
    }

    @Override
    public boolean cachedResultsAvailable(FileSystem fileSystem, Configuration conf, QueryRequest theClientRequest)
            throws IOException {

        FileSystem localFileSystem = FileSystem.getLocal(conf);
        Path urlOutputFileName = new Path(getLocalQueryResultsPathPrefix(theClientRequest) + "DATA");

        // LOG.info("Cached Results Available called for Query:" + theClientRequest.getSourceQuery().getQueryId() + ". Checking Path:" +  urlOutputFileName);
        if (localFileSystem.exists(urlOutputFileName)) {
            return true;
        }
        return false;
    }

    @SuppressWarnings("unchecked")
    @Override
    protected long executeLocal(FileSystem remoteFileSystem, Configuration conf,
            DatabaseIndexV2.MasterDatabaseIndex index, EventLoop eventLoop, File tempFileDir,
            QueryRequest<URLLinkDetailQueryInfo, Writable, Writable> requestObject) throws IOException {

        // either we need to fetch the link data information or we are going to need consolidate the remote results ...
        FileSystem localFileSystem = FileSystem.getLocal(conf);

        int targetShardId = getQueryData().getLinkDBFileNo();

        LOG.info("remoteDispathc Complete Called");
        Path remoteFilePath = new Path(getHDFSQueryResultsPath(), getPartNameForSlave(targetShardId));

        long recordCount = 0L;

        if (remoteFileSystem.exists(remoteFilePath)) {

            LocalFileSystem localFS = FileSystem.getLocal(conf);

            Path localURLListPath = new Path(getLocalQueryResultsPathPrefix(requestObject) + "DATA");
            Path localURLListIndexPath = new Path(getLocalQueryResultsPathPrefix(requestObject) + "DATA.index");

            localFS.delete(localURLListPath);
            localFS.delete(localURLListIndexPath);

            if (getQueryData().getQueryType() == URLLinkDetailQueryInfo.QueryType.LINKS_QUERY
                    || getQueryData().getQueryType() == URLLinkDetailQueryInfo.QueryType.INVERSE_QUERY) {
                recordCount = runOutlinkLocalQuery(remoteFileSystem, remoteFilePath, localFS, localURLListPath);
            } else if (getQueryData().getQueryType() == URLLinkDetailQueryInfo.QueryType.INVERSE_BY_DOMAIN_QUERY
                    || getQueryData()
                            .getQueryType() == URLLinkDetailQueryInfo.QueryType.INVERSE_BY_DOMAIN_DETAIL_QUERY) {
                recordCount = runInlinksLocalQuery(index, remoteFileSystem, remoteFilePath, localFileSystem,
                        localURLListIndexPath, localURLListPath);
            }
        }
        return recordCount;
    }

    private long runOutlinkLocalQuery(FileSystem inputFileSystem, Path outlinksInputPath,
            FileSystem outputFileSystem, Path outlinksOutputPath) throws IOException {

        long recordCount = 0L;

        outputFileSystem.delete(outlinksOutputPath);

        FSDataInputStream remoteInputStream = inputFileSystem.open(outlinksInputPath);

        try {

            FSDataOutputStream outputStream = outputFileSystem.create(outlinksOutputPath);

            try {
                CompressedURLFPListV2.Reader reader = new CompressedURLFPListV2.Reader(remoteInputStream);

                while (reader.hasNext()) {

                    URLFPV2 fingerprint = reader.next();

                    outputStream.writeLong(fingerprint.getDomainHash());
                    outputStream.writeLong(fingerprint.getUrlHash());

                    recordCount++;
                }
            } finally {
                outputStream.close();
            }
        } finally {
            remoteInputStream.close();
        }
        return recordCount;
    }

    private static class DomainInfo {

        DomainInfo(long domainId) {
            this.domainId = domainId;
        }

        public long domainId = -1;
        public int urlCount = 0;
        public String domainName = null;
        public long dataPos = -1;
    }

    private long runInlinksLocalQuery(DatabaseIndexV2.MasterDatabaseIndex index, FileSystem inputFileSystem,
            Path inlinksInputPath, FileSystem outputFileSystem, Path inlinksDomainIndexPath,
            Path inlinksDetailOutputPath) throws IOException {

        long recordCount = 0L;

        outputFileSystem.delete(inlinksDomainIndexPath);
        outputFileSystem.delete(inlinksDetailOutputPath);

        FSDataInputStream remoteInputStream = inputFileSystem.open(inlinksInputPath);

        try {

            FSDataOutputStream indexOutputStream = outputFileSystem.create(inlinksDomainIndexPath);
            FSDataOutputStream detailOutputStream = outputFileSystem.create(inlinksDetailOutputPath);

            ArrayList<InlinkingDomainInfo> domainList = new ArrayList<InlinkingDomainInfo>();

            try {

                LOG.info("Writing Detail Stream to:" + inlinksDetailOutputPath);
                CompressedURLFPListV2.Reader reader = new CompressedURLFPListV2.Reader(remoteInputStream);

                InlinkingDomainInfo lastDomain = null;

                while (reader.hasNext()) {

                    // read the nex fingerprint 
                    URLFPV2 fingerprint = reader.next();
                    // and first see if we have a domain transition 
                    if (lastDomain == null || lastDomain.getDomainId() != fingerprint.getDomainHash()) {
                        // remember the domain 
                        lastDomain = new InlinkingDomainInfo();
                        lastDomain.setDomainId(fingerprint.getDomainHash());
                        // add it to the list 
                        domainList.add(lastDomain);
                        // update date position 
                        lastDomain.setUrlDataPos(detailOutputStream.getPos());
                    }
                    // increment url count for the domain
                    lastDomain.setUrlCount(lastDomain.getUrlCount() + 1);

                    detailOutputStream.writeLong(fingerprint.getDomainHash());
                    detailOutputStream.writeLong(fingerprint.getUrlHash());

                    recordCount++;
                }

                LOG.info("Retrieving Domain Metadata for :" + domainList.size() + " Domain Records");
                // ok, now resolve domain names
                for (InlinkingDomainInfo domain : domainList) {
                    SubDomainMetadata metadata = index.queryDomainMetadataGivenDomainId(domain.getDomainId());
                    if (metadata == null) {
                        LOG.error("*** Failed to Resolve DomainId:" + domain.getDomainId());
                    } else {
                        if (metadata.getDomainText().length() == 0) {
                            LOG.error("*** Metadata for Domain Id:" + domain.getDomainId()
                                    + " contained NULL Name Value.");
                            domain.setDomainName("_ERROR:BAD RECORD");
                        } else {
                            domain.setDomainName(metadata.getDomainText());
                        }
                        //LOG.info("***Found Domain:" + domain.getDomainName() + " urlCount:" + domain.getUrlCount());
                    }
                }

                LOG.info("Sorting Domain List of Size:" + domainList.size());
                // ok sort by domain name 
                Collections.sort(domainList);

                LOG.info("Building In Memory Index");

                // ok write out domain info
                DataOutputBuffer indexHeaderBuffer = new DataOutputBuffer();
                DataOutputBuffer indexDataBuffer = new DataOutputBuffer();

                LOG.info("***Writing Domain List Size:" + domainList.size());
                indexHeaderBuffer.writeInt(domainList.size());

                // ok iterate and write to both buffers  
                for (InlinkingDomainInfo domain : domainList) {
                    indexHeaderBuffer.writeInt(indexDataBuffer.getLength());
                    domain.write(indexDataBuffer);
                }

                LOG.info("Writing Index to:" + inlinksDomainIndexPath + " IndexHeaderLength:"
                        + indexHeaderBuffer.getLength() + " IndexDataLength:" + indexDataBuffer.getLength());
                // ok now flush both buffers to disk
                indexOutputStream.write(indexHeaderBuffer.getData(), 0, indexHeaderBuffer.getLength());
                indexOutputStream.write(indexDataBuffer.getData(), 0, indexDataBuffer.getLength());
            } finally {
                indexOutputStream.flush();
                indexOutputStream.close();
                detailOutputStream.flush();
                detailOutputStream.close();
            }
        } finally {
            remoteInputStream.close();
        }
        return recordCount;
    }

    private static void readPaginatedInlinkingDomainInfo(final DatabaseIndexV2.MasterDatabaseIndex masterIndex,
            FileSystem indexFileSystem, Path indexPath, Path detailPath, int sortOrder, int pageNumber,
            int pageSize, QueryResult<Writable, Writable> resultOut) throws IOException {
        // if descending sort order ... 
        // take pageNumber * pageSize as starting point
        long offset = 0;
        long startPos = 0;
        long endPos = 0;

        FSDataInputStream indexStream = indexFileSystem.open(indexPath);

        try {

            // read in the total record count ... 
            int totalRecordCount = indexStream.readInt();

            LOG.info("***RecordCount:" + totalRecordCount + " Allocating Buffer Of:" + (totalRecordCount * 4)
                    + " bytes. FileLength:" + indexFileSystem.getFileStatus(indexPath).getLen());
            // read in index header data upfront 
            byte indexHeaderData[] = new byte[totalRecordCount * 4];
            // read it 
            indexStream.readFully(indexHeaderData);
            // mark string start pos 
            long detailStartPos = indexStream.getPos();
            // initialize index header reader stream 
            DataInputBuffer indexHeaderStream = new DataInputBuffer();
            indexHeaderStream.reset(indexHeaderData, 0, indexHeaderData.length);

            resultOut.getResults().clear();
            resultOut.setPageNumber(pageNumber);
            resultOut.setTotalRecordCount(totalRecordCount);

            if (sortOrder == ClientQueryInfo.SortOrder.ASCENDING) {
                startPos = pageNumber * pageSize;
                endPos = Math.min(startPos + pageSize, totalRecordCount);
                offset = pageNumber * pageSize;
            } else {
                startPos = totalRecordCount - ((pageNumber + 1) * pageSize);
                endPos = startPos + pageSize;
                startPos = Math.max(0, startPos);
                offset = totalRecordCount - ((pageNumber + 1) * pageSize);
            }
            //LOG.info("readPaginatedResults called on Index with sortOrder:" + sortOrder + " pageNumber: " + pageNumber + " pageSize:" + pageSize + " offset is:" + offset);
            if (startPos < totalRecordCount) {

                //LOG.info("Seeking to Offset:" + startPos);
                indexHeaderStream.skip(startPos * 4);
                //LOG.info("Reading from:"+ startPos + " to:" + endPos + " (exclusive)");
                for (long i = startPos; i < endPos; ++i) {

                    // read data offset ... 
                    int domainDataPos = indexHeaderStream.readInt();
                    // seek to it 
                    indexStream.seek(detailStartPos + domainDataPos);
                    // read the detail data  
                    InlinkingDomainInfo domainInfo = new InlinkingDomainInfo();
                    domainInfo.readFields(indexStream);
                    // ok extract name 
                    String domainName = domainInfo.getDomainName();
                    if (domainName.length() == 0) {
                        //TODO: NEED TO TRACK THIS DOWN 
                        domainName = "<<OOPS-NULL>>";
                    }
                    Text key = new Text(domainName);
                    domainInfo.setFieldClean(InlinkingDomainInfo.Field_DOMAINNAME);

                    if (sortOrder == ClientQueryInfo.SortOrder.DESCENDING) {
                        resultOut.getResults().add(0, new QueryResultRecord<Writable, Writable>(key, domainInfo));
                    } else {
                        resultOut.getResults().add(new QueryResultRecord<Writable, Writable>(key, domainInfo));
                    }
                }
            }
        } finally {
            indexStream.close();
        }
    }

    private static void readPaginatedInlinkingDomainDetail(final DatabaseIndexV2.MasterDatabaseIndex masterIndex,
            FileSystem indexFileSystem, Path detailDataPath, InlinkingDomainInfo srcDomainInfo, int sortOrder,
            int pageNumber, int pageSize, QueryResult<Writable, Writable> resultOut) throws IOException {
        // if descending sort order ... 
        // take pageNumber * pageSize as starting point
        long offset = 0;
        long startPos = 0;
        long endPos = 0;

        FSDataInputStream dataStream = indexFileSystem.open(detailDataPath);

        try {

            // read in the total record count ... 
            int totalRecordCount = srcDomainInfo.getUrlCount();

            resultOut.getResults().clear();
            resultOut.setPageNumber(pageNumber);
            resultOut.setTotalRecordCount(totalRecordCount);

            if (sortOrder == ClientQueryInfo.SortOrder.ASCENDING) {
                startPos = pageNumber * pageSize;
                endPos = Math.min(startPos + pageSize, totalRecordCount);
                offset = pageNumber * pageSize;
            } else {
                startPos = totalRecordCount - ((pageNumber + 1) * pageSize);
                endPos = startPos + pageSize;
                startPos = Math.max(0, startPos);
                offset = totalRecordCount - ((pageNumber + 1) * pageSize);
            }
            //LOG.info("readPaginatedResults called on Index with sortOrder:" + sortOrder + " pageNumber: " + pageNumber + " pageSize:" + pageSize + " offset is:" + offset);
            if (startPos < totalRecordCount) {

                //LOG.info("Seeking to Offset:" + startPos);
                dataStream.skip(srcDomainInfo.getUrlDataPos() + (startPos * FP_RECORD_SIZE));
                //LOG.info("Reading from:"+ startPos + " to:" + endPos + " (exclusive)");
                for (long i = startPos; i < endPos; ++i) {

                    URLFPV2 key = new URLFPV2();

                    key.setDomainHash(dataStream.readLong());
                    key.setUrlHash(dataStream.readLong());

                    // ok time to find this item in the master index ... 
                    CrawlDatumAndMetadata metadataObject = new CrawlDatumAndMetadata();
                    MetadataOut metadataOut = masterIndex.queryMetadataAndURLGivenFP(key);

                    if (metadataOut == null) {
                        LOG.error("Failed to Retrieve URL and Metadata for Domain:" + key.getDomainHash() + " FP:"
                                + key.getUrlHash());
                        metadataObject.setUrl("NULL-DH(" + key.getDomainHash() + ")-FP(" + key.getUrlHash() + ")");
                    } else {
                        metadataObject.setUrl(metadataOut.url.toString());
                        metadataObject.setStatus(metadataOut.fetchStatus);
                        if (metadataOut.lastFetchTime > 0) {
                            metadataObject.getMetadata().setLastFetchTimestamp(metadataOut.lastFetchTime);
                        }
                        metadataObject.getMetadata().setPageRank(metadataOut.pageRank);
                    }

                    if (sortOrder == ClientQueryInfo.SortOrder.DESCENDING) {
                        resultOut.getResults().add(0,
                                new QueryResultRecord<Writable, Writable>(key, metadataObject));
                    } else {
                        resultOut.getResults().add(new QueryResultRecord<Writable, Writable>(key, metadataObject));
                    }

                }
            }
        } finally {
            dataStream.close();
        }
    }

    @Override
    protected long executeRemote(FileSystem fileSystem, Configuration conf, EventLoop eventLoop,
            SlaveDatabaseIndex instanceIndex, File tempFirDir,
            QueryProgressCallback<URLLinkDetailQueryInfo, Writable, Writable> progressCallback) throws IOException {

        // OK .. WE EXPECT A SINGLE RELEVANT SHARD ID 
        if (getCommonQueryInfo().getRelevantShardIds().size() != 1) {
            throw new IOException("Invalid Shard Id Count in Remote Dispatch");
        }

        int myShardIndex = getCommonQueryInfo().getRelevantShardIds().get(0);

        // calculate the output path 
        FlexBuffer linkDataOut = null;
        if (getQueryData().getQueryType() == URLLinkDetailQueryInfo.QueryType.LINKS_QUERY) {
            linkDataOut = instanceIndex.queryOutlinksByFP(getQueryData().getTargetURLFP(), myShardIndex,
                    getQueryData().getLinkDBOffset());
        } else {
            linkDataOut = instanceIndex.queryInlinksByFP(getQueryData().getTargetURLFP(), myShardIndex,
                    getQueryData().getLinkDBOffset());
        }

        // ok, if the stream is valid ... 
        long recordCount = 0;

        // ok create output stream 
        Path remoteFilePath = new Path(getHDFSQueryResultsPath(), getPartNameForSlave(myShardIndex));

        FSDataOutputStream outputStream = fileSystem.create(remoteFilePath);

        try {
            // write inlinks to output stream 

            if (linkDataOut != null && linkDataOut.getCount() != 0) {
                recordCount = 1;
                outputStream.write(linkDataOut.get(), linkDataOut.getOffset(), linkDataOut.getCount());
                outputStream.flush();
            }
        } catch (IOException e) {
            //in case of error delete output file 
            LOG.error(CCStringUtils.stringifyException(e));
            outputStream.close();
            outputStream = null;
            fileSystem.delete(remoteFilePath);
        } finally {
            if (outputStream != null) {
                outputStream.close();
            }
        }
        return recordCount;
    }

    @Override
    public boolean requiresRemoteDispatch(FileSystem fileSystem, Configuration conf, ShardMapper shardMapper,
            QueryRequest<URLLinkDetailQueryInfo, Writable, Writable> theClientRequest,
            ArrayList<ShardIndexHostNameTuple> shardIdToHostNameMapping) throws IOException {

        if (!getQueryData().isFieldDirty(URLLinkDetailQueryInfo.Field_LINKDBFILENO)) {
            throw new IOException("No Shard Id Specified in Query!");
        }
        int targetShardId = getQueryData().getLinkDBFileNo();

        // ok, otherwsie calculate remote output path based on shard id  
        Path remoteFilePath = new Path(getHDFSQueryResultsPath(), getPartNameForSlave(targetShardId));
        // ok figure out of the path exists ... 
        if (fileSystem.exists(remoteFilePath)) {
            return false;
        }
        // otherwise, yes we need to initiate a remote query ... 
        // get shard mapping based on index 
        ArrayList<ShardIndexHostNameTuple> tuples = null;

        if (getQueryData().getQueryType() == URLLinkDetailQueryInfo.QueryType.LINKS_QUERY) {
            tuples = shardMapper.mapShardIdsForIndex(DatabaseIndexV2.MasterDatabaseIndex.INDEX_NAME_OUTLINK_DATA);
        } else {
            tuples = shardMapper.mapShardIdsForIndex(DatabaseIndexV2.MasterDatabaseIndex.INDEX_NAME_INLINK_DATA);
        }
        // locate our specific shard tuple 
        ShardIndexHostNameTuple tupleOut = null;
        for (ShardIndexHostNameTuple tuple : tuples) {
            if (tuple.getShardId() == targetShardId) {
                tupleOut = tuple;
                break;
            }
        }

        // we need that mapping  
        if (tupleOut == null) {
            throw new IOException("Could Not Find Mapping for Shard:" + targetShardId + " in Index:"
                    + DatabaseIndexV2.MasterDatabaseIndex.INDEX_NAME_INLINK_DATA);
        } else {
            // add the mapping to output tuple list
            shardIdToHostNameMapping.add(tupleOut);
            // yes.... we require remote dispatch 
            return true;
        }
    }

    static final int FP_RECORD_SIZE = 16;

    private static void readPaginatedResults(final DatabaseIndexV2.MasterDatabaseIndex masterIndex,
            FSDataInputStream inputStream, long length, int sortOrder, int pageNumber, int pageSize,
            QueryResult<Writable, Writable> resultOut) throws IOException {
        // if descending sort order ... 
        // take pageNumber * pageSize as starting point
        long offset = 0;
        long startPos = 0;
        long endPos = 0;

        // calculate total record count ... 
        int totalRecordCount = (int) (length / FP_RECORD_SIZE);

        resultOut.getResults().clear();
        resultOut.setPageNumber(pageNumber);
        resultOut.setTotalRecordCount(totalRecordCount);

        if (sortOrder == ClientQueryInfo.SortOrder.ASCENDING) {
            startPos = pageNumber * pageSize;
            endPos = Math.min(startPos + pageSize, totalRecordCount);
            offset = pageNumber * pageSize;
        } else {
            startPos = totalRecordCount - ((pageNumber + 1) * pageSize);
            endPos = startPos + pageSize;
            startPos = Math.max(0, startPos);
            offset = totalRecordCount - ((pageNumber + 1) * pageSize);
        }
        //LOG.info("readPaginatedResults called on Index with sortOrder:" + sortOrder + " pageNumber: " + pageNumber + " pageSize:" + pageSize + " offset is:" + offset);
        if (startPos < totalRecordCount) {

            //LOG.info("Seeking to Offset:" + startPos);
            inputStream.seek(startPos * FP_RECORD_SIZE);
            //LOG.info("Reading from:"+ startPos + " to:" + endPos + " (exclusive)");
            for (long i = startPos; i < endPos; ++i) {

                URLFPV2 key = new URLFPV2();

                key.setDomainHash(inputStream.readLong());
                key.setUrlHash(inputStream.readLong());

                // ok time to find this item in the master index ... 
                CrawlDatumAndMetadata metadataObject = new CrawlDatumAndMetadata();
                MetadataOut metadataOut = masterIndex.queryMetadataAndURLGivenFP(key);

                if (metadataOut == null) {
                    LOG.error("Failed to Retrieve URL and Metadata for Domain:" + key.getDomainHash() + " FP:"
                            + key.getUrlHash());
                    metadataObject.setUrl("NULL-DH(" + key.getDomainHash() + ")-FP(" + key.getUrlHash() + ")");
                } else {
                    metadataObject.setUrl(metadataOut.url.toString());
                    metadataObject.setStatus(metadataOut.fetchStatus);
                    if (metadataOut.lastFetchTime > 0) {
                        metadataObject.getMetadata().setLastFetchTimestamp(metadataOut.lastFetchTime);
                    }
                    metadataObject.getMetadata().setPageRank(metadataOut.pageRank);
                }

                if (sortOrder == ClientQueryInfo.SortOrder.DESCENDING) {
                    resultOut.getResults().add(0, new QueryResultRecord<Writable, Writable>(key, metadataObject));
                } else {
                    resultOut.getResults().add(new QueryResultRecord<Writable, Writable>(key, metadataObject));
                }
            }
        }
    }

    @Override
    public void getCachedResults(FileSystem fileSyste, Configuration conf, EventLoop eventLoop,
            final DatabaseIndexV2.MasterDatabaseIndex masterIndex,
            QueryRequest<URLLinkDetailQueryInfo, Writable, Writable> theClientRequest,
            QueryCompletionCallback<URLLinkDetailQueryInfo, Writable, Writable> callback) throws IOException {
        LOG.info("getCachedResults for Query:" + getQueryId() + " Retrieving Cached Results");

        FileSystem localFileSystem = FileSystem.getLocal(conf);

        Path cacheDataFileName = new Path(getLocalQueryResultsPathPrefix(theClientRequest) + "DATA");
        Path cacheDataIndexFileName = new Path(getLocalQueryResultsPathPrefix(theClientRequest) + "DATA.index");

        QueryResult<Writable, Writable> resultOut = new QueryResult<Writable, Writable>();

        if (getQueryData().getQueryType() == URLLinkDetailQueryInfo.QueryType.LINKS_QUERY
                || getQueryData().getQueryType() == URLLinkDetailQueryInfo.QueryType.INVERSE_QUERY) {

            FSDataInputStream inputStream = localFileSystem.open(cacheDataFileName);

            try {
                //LOG.info("Calling ReadPaginationResults");
                readPaginatedResults(masterIndex, inputStream, localFileSystem.getLength(cacheDataFileName),
                        theClientRequest.getClientQueryInfo().getSortOrder(),
                        theClientRequest.getClientQueryInfo().getPaginationOffset(),
                        theClientRequest.getClientQueryInfo().getPageSize(), resultOut);
            } finally {
                inputStream.close();
            }
        } else if (getQueryData().getQueryType() == URLLinkDetailQueryInfo.QueryType.INVERSE_BY_DOMAIN_QUERY) {
            readPaginatedInlinkingDomainInfo(masterIndex, localFileSystem, cacheDataIndexFileName,
                    cacheDataFileName, theClientRequest.getClientQueryInfo().getSortOrder(),
                    theClientRequest.getClientQueryInfo().getPaginationOffset(),
                    theClientRequest.getClientQueryInfo().getPageSize(), resultOut);
        } else if (getQueryData()
                .getQueryType() == URLLinkDetailQueryInfo.QueryType.INVERSE_BY_DOMAIN_DETAIL_QUERY) {

            InlinkingDomainInfo domainInfo = new InlinkingDomainInfo();

            domainInfo.setUrlCount(getQueryData().getInlinkDomainURLCount());
            domainInfo.setUrlDataPos(getQueryData().getUrlDataOffset());

            readPaginatedInlinkingDomainDetail(masterIndex, localFileSystem, cacheDataFileName, domainInfo,
                    theClientRequest.getClientQueryInfo().getSortOrder(),
                    theClientRequest.getClientQueryInfo().getPaginationOffset(),
                    theClientRequest.getClientQueryInfo().getPageSize(), resultOut);

        }

        //LOG.info("Initiating getCachedResults Callback");
        callback.queryComplete(theClientRequest, resultOut);

    }
}