org.commoncrawl.service.queryserver.master.MasterServer.java Source code

Java tutorial

Introduction

Here is the source code for org.commoncrawl.service.queryserver.master.MasterServer.java

Source

/**
 * Copyright 2008 - CommonCrawl Foundation
 * 
 *    This program is free software: you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation, either version 3 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 **/

package org.commoncrawl.service.queryserver.master;

import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.InetAddress;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.Map;
import java.util.Set;
import java.util.StringTokenizer;
import java.util.TreeMap;
import java.util.Vector;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.zip.CRC32;

import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.commoncrawl.async.ConcurrentTask;
import org.commoncrawl.async.Timer;
import org.commoncrawl.async.ConcurrentTask.CompletionCallback;
import org.commoncrawl.crawl.common.internal.CrawlEnvironment;
import org.commoncrawl.db.RecordStore;
import org.commoncrawl.protocol.ArchiveInfo;
import org.commoncrawl.protocol.shared.ArcFileItem;
import org.commoncrawl.rpc.base.internal.AsyncClientChannel;
import org.commoncrawl.rpc.base.internal.AsyncContext;
import org.commoncrawl.rpc.base.internal.AsyncRequest;
import org.commoncrawl.rpc.base.internal.AsyncServerChannel;
import org.commoncrawl.rpc.base.shared.RPCException;
import org.commoncrawl.rpc.base.shared.RPCStruct;
import org.commoncrawl.server.CommonCrawlServer;
import org.commoncrawl.service.queryserver.BaseConfig;
import org.commoncrawl.service.queryserver.ClientQueryInfo;
import org.commoncrawl.service.queryserver.Common;
import org.commoncrawl.service.queryserver.ContentQueryRPCInfo;
import org.commoncrawl.service.queryserver.ContentQueryRPCResult;
import org.commoncrawl.service.queryserver.MasterState;
import org.commoncrawl.service.queryserver.PersistentQueryInfo;
import org.commoncrawl.service.queryserver.QueryServerMaster;
import org.commoncrawl.service.queryserver.QueryStatus;
import org.commoncrawl.service.queryserver.ShardIndexHostNameTuple;
import org.commoncrawl.service.queryserver.SlaveStatus;
import org.commoncrawl.service.queryserver.index.DatabaseIndexV2;
import org.commoncrawl.service.queryserver.query.Query;
import org.commoncrawl.service.queryserver.query.QueryCompletionCallback;
import org.commoncrawl.service.queryserver.query.QueryProgressCallback;
import org.commoncrawl.service.queryserver.query.QueryRequest;
import org.commoncrawl.service.queryserver.query.QueryResult;
import org.commoncrawl.service.queryserver.query.RemoteQueryCompletionCallback;
import org.commoncrawl.service.queryserver.query.ShardMapper;
import org.commoncrawl.util.CCStringUtils;
import org.commoncrawl.util.FileUtils;

/**
 * 
 * @author rana
 *
 */
public class MasterServer extends CommonCrawlServer
        implements QueryServerMaster, ShardMapper, AsyncServerChannel.ConnectionCallback {

    /////////////////////////////////////////////////////////////////////////////////////////////////
    // react to a status change in a query slave's state ..
    private static final String MasterDBStateKey = "DBState";
    private static final String CachedQueryIDPrefix = "CQID_";
    private static final String CachedQueryPrefix = "CQ_";

    private String _slavesFile;
    private File _cacheDirs[];
    private File _tempFileDir = null;
    private long _tempFileDirSeed = -1;
    private File _webAppRoot = null;
    private Vector<QueryServerSlaveState> _slaves = new Vector<QueryServerSlaveState>();
    private Map<String, QueryServerSlaveState> _slaveNameToOnlineStateMap = new TreeMap<String, QueryServerSlaveState>();
    private Map<String, SlaveStatus> _slaveStatusMap = new TreeMap<String, SlaveStatus>();
    private long _slavesFileCRC = -1;
    private String _hdfsWorkingDir = "crawl/querydb/temp";
    private String _hdfsResultsDir = "crawl/querydb/results";
    private String _hdfsResultsCacheDir = "crawl/querydb/cache";
    private long _databaseId = -1;
    private Path _localDataDir = null;
    private int _dataDriveCount = -1;
    private DatabaseIndexV2.MasterDatabaseIndex _masterIndex = null;
    private Path _queryDBPath = null;
    /** record store object used to persist state **/
    private RecordStore _recordStore = new RecordStore();
    private MasterState _masterState = null;
    @SuppressWarnings("unchecked")
    private LinkedList<QueryRequest> _queuedClientQueries = new LinkedList<QueryRequest>();

    @SuppressWarnings("unchecked")
    private Map<Long, QueryRequest> _activeRemoteOrLocalQueries = new HashMap<Long, QueryRequest>();
    @SuppressWarnings("unchecked")
    private Set<QueryRequest> _activeClientQueries = new HashSet<QueryRequest>();

    @SuppressWarnings("unused")
    private QueryServerFE _queryServerFE;
    private ExecutorService _s3DownloaderThreadPool = Executors.newCachedThreadPool();

    private File getLocalCacheDirForQuery(long queryId) {
        int paritionId = ((int) queryId) % _cacheDirs.length;
        return _cacheDirs[paritionId];
    }

    public MasterServer() {
        //setAsyncWebDispatch(true);
    }

    public DatabaseIndexV2.MasterDatabaseIndex getDatabaseIndex() {
        return _masterIndex;
    }

    ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
    // INTERNAL ROUTINES 
    ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

    public static class BlockingQueryResult<KeyType, ValueType> {

        public BlockingQueryResult(QueryResult<KeyType, ValueType> resultObject) {
            querySucceeded = true;
            this.resultObject = resultObject;
        }

        public BlockingQueryResult(String failureReason) {
            querySucceeded = false;
            this.errorString = failureReason;
        }

        public boolean querySucceeded = false;
        public QueryResult<KeyType, ValueType> resultObject;
        public String errorString;
    }

    <DataType extends RPCStruct, KeyType, ValueType> BlockingQueryResult<KeyType, ValueType> blockingQueryRequest(
            final Query<DataType, KeyType, ValueType> queryObject, final ClientQueryInfo queryInfo)
            throws IOException {
        final LinkedBlockingQueue<BlockingQueryResult<KeyType, ValueType>> queue = new LinkedBlockingQueue<BlockingQueryResult<KeyType, ValueType>>(
                1);

        getEventLoop().setTimer(new Timer(0, false, new Timer.Callback() {

            @Override
            public void timerFired(Timer timer) {
                try {
                    queueClientQueryRequest(queryObject, queryInfo,
                            new QueryCompletionCallback<DataType, KeyType, ValueType>() {

                                @Override
                                public void queryComplete(QueryRequest<DataType, KeyType, ValueType> request,
                                        QueryResult<KeyType, ValueType> queryResult) {
                                    LOG.info("Recevied QueryComplete for query:"
                                            + request.getSourceQuery().getQueryId());
                                    BlockingQueryResult<KeyType, ValueType> result = new BlockingQueryResult<KeyType, ValueType>(
                                            queryResult);
                                    try {
                                        LOG.info("Queing response for Query:"
                                                + request.getSourceQuery().getQueryId());
                                        queue.put(result);
                                        LOG.info("Queued response for Query:"
                                                + request.getSourceQuery().getQueryId());
                                    } catch (InterruptedException e) {
                                        LOG.error(CCStringUtils.stringifyException(e));
                                    }
                                }

                                @Override
                                public void queryFailed(QueryRequest<DataType, KeyType, ValueType> request,
                                        String reason) {
                                    LOG.info("Received queryFailed for request:"
                                            + request.getSourceQuery().getQueryId());
                                    BlockingQueryResult<KeyType, ValueType> result = new BlockingQueryResult<KeyType, ValueType>(
                                            reason);
                                    try {
                                        queue.put(result);
                                    } catch (InterruptedException e) {
                                        LOG.error(CCStringUtils.stringifyException(e));
                                    }
                                }

                            });
                } catch (IOException e) {
                    LOG.error(CCStringUtils.stringifyException(e));
                }
            }

        }));

        try {
            return queue.take();
        } catch (InterruptedException e) {
            LOG.error(CCStringUtils.stringifyException(e));
        }
        return null;
    }

    <DataType extends RPCStruct, KeyType, ValueType> void queueClientQueryRequest(
            Query<DataType, KeyType, ValueType> queryObject, ClientQueryInfo theClientRequest,
            QueryCompletionCallback<DataType, KeyType, ValueType> callback) throws IOException {

        // set query info
        queryObject.setClientQueryInfo(theClientRequest);

        // get the cannonical id for this query 
        String queryCanonicalId = queryObject.getCanonicalId();

        LOG.info("Received Query Request with CannonicalId:" + queryCanonicalId);

        // now check cache for persistent query cache info ...
        PersistentQueryInfo persistentQueryInfo = getPersistentQueryInfo(queryCanonicalId);
        // ok, cached query found ... 
        if (persistentQueryInfo != null) {
            LOG.info("Existing Query Id found:" + persistentQueryInfo.getQueryId()
                    + " for Request with CannonicalId:" + queryCanonicalId);
            // found cached query... set id of source query object  
            queryObject.setQueryId(persistentQueryInfo.getQueryId());
        } else {
            // assign the query a new id 
            queryObject.setQueryId(getNextQueryId());

            LOG.info("Assigning Query Id:" + queryObject.getQueryId() + " for Request with CannonicalId:"
                    + queryCanonicalId);

            // and store the relationship
            persistentQueryInfo = new PersistentQueryInfo();
            persistentQueryInfo.setCannonicalQueryId(queryCanonicalId);
            persistentQueryInfo.setQueryId(queryObject.getQueryId());
            persistentQueryInfo.setCreateTime(System.currentTimeMillis());

            LOG.info("Inserting Persistent Query Record");
            // insert new structure into database ... 
            insertUpdatePersistentInfo(persistentQueryInfo, false);
        }

        // establish hdfs working directory 
        Path hdfsWorkingDir = new Path(_hdfsWorkingDir, Long.toString(queryObject.getQueryId()));

        // remove existing directory if present ... 
        CrawlEnvironment.getDefaultFileSystem().delete(hdfsWorkingDir, true);

        // create the working directory
        CrawlEnvironment.getDefaultFileSystem().mkdirs(hdfsWorkingDir);

        // establish the hdfs working directory ...
        queryObject.getCommonQueryInfo().setQueryResultPath(hdfsWorkingDir.toString());

        // establish the query cache directory 
        File localQueryDirectory = getLocalCacheDirForQuery(queryObject.getQueryId());

        LOG.info("Query Cache Directory for Query:" + queryObject.getQueryId() + " is:"
                + localQueryDirectory.getAbsolutePath());
        // make sure it exists ... 
        localQueryDirectory.mkdirs();

        // allocate  client request object ...
        QueryRequest<DataType, KeyType, ValueType> clientQueryObj = new QueryRequest<DataType, KeyType, ValueType>(
                queryObject, theClientRequest, localQueryDirectory, callback);

        // setup context ... 
        queryObject.setContext(clientQueryObj);

        LOG.info("Query Client Request");

        // add it to queue ... 
        _queuedClientQueries.addLast(clientQueryObj);

        //potentially start the next query ... 
        potentiallyStartNextQuery();
    }

    @SuppressWarnings("unchecked")
    private void potentiallyStartNextQuery() throws IOException {

        FileSystem fileSystem = CrawlEnvironment.getDefaultFileSystem();

        LinkedList<QueryRequest> requeueList = new LinkedList<QueryRequest>();

        while (_queuedClientQueries.size() != 0 && _activeClientQueries.size() < Common.MAX_CONCURRENT_QUERIES) {

            QueryRequest request = _queuedClientQueries.removeFirst();

            LOG.info("Processing Query:" + request.getSourceQuery().getQueryId() + " ActiveCount:"
                    + _activeClientQueries.size());

            try {
                // first see if a remote (or local) query is active ... 
                if (_activeRemoteOrLocalQueries.get(request.getSourceQuery().getQueryId()) != null) {
                    LOG.info("Cannot Dispatch ClientRequest:" + request.getClientQueryInfo().getClientQueryId()
                            + " because existing query in progress");
                    // Fail the query immediately for now ..
                    request.getCompletionCallback().queryFailed(request,
                            "A similar query is already running and may take some time to complete. Please try again later.");
                } else {
                    ArrayList<ShardIndexHostNameTuple> shardIdMapping = new ArrayList<ShardIndexHostNameTuple>();
                    // first check to see if cached results are available ... 
                    if (request.getSourceQuery().cachedResultsAvailable(fileSystem, _configuration, request)) {
                        // add to active ... 
                        _activeClientQueries.add(request);
                        LOG.info("Running Cache Query for Query:" + request.getSourceQuery().getQueryId());
                        runCacheQuery(request);
                    }
                    // check to see if remote dispatch is required ..
                    else if (request.getSourceQuery().requiresRemoteDispatch(fileSystem, _configuration, this,
                            request, shardIdMapping)) {
                        // ok, we need at least on shard to run on ... 
                        if (shardIdMapping.size() == 0) {
                            LOG.error("Query:" + request.getSourceQuery().getQueryId()
                                    + " FAILED WITH EMPTY HOSTS(TO RUN ON) LIST");
                            throw new IOException("Empty Host List prior to remoteDispath!");
                        }
                        // set shard id to host mapping into query 
                        request.getSourceQuery().setShardIdToHostMapping(shardIdMapping);
                        // ok, we ready for remote dispatch ... 
                        // add to active ... 
                        _activeClientQueries.add(request);
                        // add to remote dispatch id set 
                        _activeRemoteOrLocalQueries.put(request.getSourceQuery().getQueryId(), request);
                        LOG.info("Running Remote Query for Query:" + request.getSourceQuery().getQueryId());
                        // and dispatch request .. 
                        runRemoteQuery(fileSystem, request);
                    }
                    // otherwise .. run Local Request
                    else {
                        // add to active ... 
                        _activeClientQueries.add(request);
                        // add to remote dispatch id set 
                        _activeRemoteOrLocalQueries.put(request.getSourceQuery().getQueryId(), request);
                        LOG.info("Running Local Query for Query:" + request.getSourceQuery().getQueryId());
                        // and dispatch request .. 
                        runLocalQuery(request);
                    }
                }
            } catch (IOException e) {
                LOG.error("Client Request:" + request.getClientQueryInfo().getClientQueryId()
                        + " Failed with Exception:" + CCStringUtils.stringifyException(e));
                request.getCompletionCallback().queryFailed(request, CCStringUtils.stringifyException(e));
            }
        }
        _queuedClientQueries.addAll(requeueList);
    }

    @SuppressWarnings("unchecked")
    private void deactivateRequest(QueryRequest request) {
        LOG.info("DeActivating Query:" + request.getSourceQuery().getQueryId());

        // first things first, delete temp file!!!
        File queryTempFile = getTempDirForQuery(request.getSourceQuery().getQueryId());
        LOG.info("** Deleting Temp File for Query:" + request.getSourceQuery().getQueryId() + " At:"
                + queryTempFile.getAbsolutePath());
        FileUtils.recursivelyDeleteFile(queryTempFile);

        _activeClientQueries.remove(request);
        if (request.getRunState() == QueryRequest.RunState.RUNNING_REMOTE
                || request.getRunState() == QueryRequest.RunState.RUNNING_LOCAL) {
            _activeRemoteOrLocalQueries.remove(request.getSourceQuery().getQueryId());
        }
        request.setRunState(QueryRequest.RunState.IDLE);

        try {
            potentiallyStartNextQuery();
        } catch (IOException e) {
            LOG.error(CCStringUtils.stringifyException(e));
        }

    }

    @SuppressWarnings("unchecked")
    private void requeueRequest(QueryRequest request) {
        deactivateRequest(request);
        LOG.info("ReQueueing Query:" + request.getSourceQuery().getQueryId());
        _queuedClientQueries.addFirst(request);
        try {
            potentiallyStartNextQuery();
        } catch (IOException e) {
            LOG.error(CCStringUtils.stringifyException(e));
        }
    }

    @SuppressWarnings("unchecked")
    private void runRemoteQuery(final FileSystem remoteFileSystem, final QueryRequest request) {

        //LOG.info("runRemoteQuery Called for Query:" + request.getSourceQuery().getQueryId());
        if (!request.setRunState(QueryRequest.RunState.RUNNING_REMOTE)) {
            deactivateRequest((QueryRequest) request.getSourceQuery().getContextObject());
            request.getCompletionCallback().queryFailed(request, "Unable to transition to RUNNING_REMOTE");
            return;
        }

        try {
            request.getSourceQuery().startRemoteQuery(_slaveNameToOnlineStateMap,
                    request.getSourceQuery().getShardIdToHostMapping(), new QueryProgressCallback() {

                        @Override
                        public boolean updateProgress(Query theQueryObject, float percentComplete) {
                            LOG.info("Got updateProgress callback for:" + theQueryObject.getQueryId());
                            return true;
                        }

                    },

                    new RemoteQueryCompletionCallback() {

                        @Override
                        public void queryComplete(Query query, long resultCount) {
                            LOG.info("Recevied QueryComplete for Query:" + request.getSourceQuery().getQueryId());

                            // call remote dispatch complete
                            try {
                                query.remoteDispatchComplete(remoteFileSystem, _configuration, request,
                                        resultCount);

                                if (resultCount > 0) {
                                    LOG.info("Remote Query:" + request.getSourceQuery().getQueryId() + " returned:"
                                            + resultCount + " results");
                                    // deactive request first ...
                                    requeueRequest((QueryRequest) query.getContextObject());
                                } else {

                                    LOG.info("Query:" + request.getSourceQuery().getQueryId()
                                            + " returned zero results");

                                    deactivateRequest(request);

                                    QueryResult result = new QueryResult();
                                    result.setTotalRecordCount(0);
                                    request.getCompletionCallback().queryComplete(request, result);

                                    LOG.info("Query:" + request.getSourceQuery().getQueryId() + " DONE DUDE");
                                }
                            } catch (IOException e) {
                                String error = "Query: " + request.getSourceQuery().getQueryId()
                                        + " Failed with Exception:" + CCStringUtils.stringifyException(e);
                                LOG.error(error);
                                // deactivate the request 
                                deactivateRequest((QueryRequest) query.getContextObject());

                                request.getCompletionCallback().queryFailed(request, error);
                            }
                        }

                        @Override
                        public void queryFailed(Query query, final String reason) {
                            LOG.info("Recevied QueryFailed for Query:" + request.getSourceQuery().getQueryId()
                                    + " Reason:" + reason);
                            // inform query of failure 
                            query.remoteDispatchFailed(remoteFileSystem);
                            // deactivate the request 
                            deactivateRequest((QueryRequest) query.getContextObject());

                            request.getCompletionCallback().queryFailed(request, reason);
                        }
                    });
        } catch (IOException e) {
            LOG.error(CCStringUtils.stringifyException(e));
            deactivateRequest(request);
            request.getCompletionCallback().queryFailed(request, CCStringUtils.stringifyException(e));
        }
    }

    @SuppressWarnings("unchecked")
    private void runLocalQuery(final QueryRequest request) {
        //LOG.info("runLocalQuery Called for Query:" + request.getSourceQuery().getQueryId());

        if (!request.setRunState(QueryRequest.RunState.RUNNING_LOCAL)) {
            deactivateRequest((QueryRequest) request.getSourceQuery().getContextObject());
            request.getCompletionCallback().queryFailed(request, "Unable to transition to RUNNING_LOCAL");
            return;
        }

        try {
            request.getSourceQuery().startLocalQuery(CrawlEnvironment.getDefaultFileSystem(), _configuration,
                    _masterIndex, getTempDirForQuery(request.getSourceQuery().getQueryId()), getEventLoop(),
                    request, new RemoteQueryCompletionCallback() {

                        @Override
                        public void queryComplete(Query query, long resultCount) {
                            LOG.info("Recevied QueryComplete for Query:" + request.getSourceQuery().getQueryId());
                            if (resultCount > 0) {
                                LOG.info("Local Query:" + request.getSourceQuery().getQueryId() + " returned:"
                                        + resultCount + " results");
                                // requeue request ...
                                requeueRequest((QueryRequest) query.getContextObject());
                            } else {
                                LOG.info("Query:" + request.getSourceQuery().getQueryId()
                                        + " returned zero results");

                                // deactive ... 
                                deactivateRequest((QueryRequest) query.getContextObject());

                                // initiate callback 

                                QueryResult result = new QueryResult();
                                result.setTotalRecordCount(0);
                                request.getCompletionCallback().queryComplete(request, result);
                                LOG.info("Query:" + request.getSourceQuery().getQueryId() + " DONE DUDE");
                            }
                        }

                        @Override
                        public void queryFailed(Query query, final String reason) {
                            LOG.info("Recevied QueryFailed for Query:" + request.getSourceQuery().getQueryId()
                                    + " Reason:" + reason);
                            deactivateRequest((QueryRequest) query.getContextObject());
                            request.getCompletionCallback().queryFailed(request, reason);
                        }
                    });
        } catch (IOException e) {
            LOG.error(CCStringUtils.stringifyException(e));
            deactivateRequest(request);
            request.getCompletionCallback().queryFailed(request, CCStringUtils.stringifyException(e));
        }
    }

    @SuppressWarnings("unchecked")
    private void runCacheQuery(QueryRequest request) {
        //LOG.info("runCacheQuery Called for Query:" + request.getSourceQuery().getQueryId());
        if (!request.setRunState(QueryRequest.RunState.RUNNING_CACHE)) {
            deactivateRequest((QueryRequest) request.getSourceQuery().getContextObject());
            request.getCompletionCallback().queryFailed(request, "Unable to transition to RUNNING_CACHE");
            return;
        }

        try {
            request.getSourceQuery().startCacheQuery(_masterIndex, CrawlEnvironment.getDefaultFileSystem(),
                    _configuration, getEventLoop(), request, new QueryCompletionCallback() {

                        @Override
                        public void queryComplete(QueryRequest request, QueryResult queryResult) {
                            deactivateRequest(request);
                            request.getCompletionCallback().queryComplete(request, queryResult);
                        }

                        @Override
                        public void queryFailed(QueryRequest request, String reason) {
                            deactivateRequest(request);
                            request.getCompletionCallback().queryFailed(request, reason);
                        }
                    });
        } catch (IOException e) {
            LOG.error(CCStringUtils.stringifyException(e));
            deactivateRequest(request);
            request.getCompletionCallback().queryFailed(request, CCStringUtils.stringifyException(e));
        }
    }

    ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
    // COMMONCRAWL SERVER OVERRIDES
    /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 

    @Override
    protected String getDefaultDataDir() {
        return CrawlEnvironment.DEFAULT_DATA_DIR;
    }

    @Override
    protected String getDefaultHttpInterface() {
        return CrawlEnvironment.DEFAULT_HTTP_INTERFACE;
    }

    @Override
    protected int getDefaultHttpPort() {
        return CrawlEnvironment.DEFAULT_QUERY_MASTER_HTTP_PORT;
    }

    @Override
    protected String getDefaultLogFileName() {
        return "qmaster";
    }

    @Override
    protected String getDefaultRPCInterface() {
        return CrawlEnvironment.DEFAULT_RPC_INTERFACE;
    }

    @Override
    protected int getDefaultRPCPort() {
        return CrawlEnvironment.DEFAULT_QUERY_MASTER_RPC_PORT;
    }

    @Override
    protected String getWebAppName() {
        return CrawlEnvironment.QUERY_MASTER_WEBAPP_NAME;
    }

    /**
     * Get the pathname to the <code>patch</code> files.
     * @param path Path to find.
     * @return the pathname as a URL
     */
    private static String getWebAppsPath(final String path) throws IOException {
        URL url = MasterServer.class.getClassLoader().getResource(path);
        if (url == null)
            throw new IOException("webapps not found in CLASSPATH");
        return url.toString();
    }

    private File getDefaultWebAppPath() throws IOException {
        return new File(getWebAppsPath("webapps") + File.separator + getWebAppName());
    }

    @Override
    protected boolean initServer() {

        _tempFileDirSeed = System.currentTimeMillis();

        if (_slavesFile == null) {
            LOG.error("Slaves File not specified. Specify Slaves file via --slaves");
            return false;
        } else {
            try {
                // get a pointer to the hdfs file system 
                // _fileSystem = CrawlEnvironment.getDefaultFileSystem();
                // parse slaves file ..
                parseSlavesFile();
            } catch (IOException e) {
                LOG.error(CCStringUtils.stringifyException(e));
                return false;
            }
        }

        // initialize database ... 
        File databasePath = new File(getDataDirectory().getAbsolutePath() + "/" + CrawlEnvironment.QMASTER_DB);
        LOG.info("Config says QMaster db path is: " + databasePath);
        // initialize master index 
        if (_databaseId == -1 || _localDataDir == null || _dataDriveCount == -1 || _cacheDirs == null) {
            if (_databaseId == -1)
                LOG.error("Database Id is Not Defined");
            if (_localDataDir == null)
                LOG.error("Local DataDir is NULL");
            if (_dataDriveCount == -1)
                LOG.error("Data Drive Count is not Defined");
            if (_cacheDirs == null)
                LOG.error("CacheDirs is NULL");
            return false;
        }

        try {
            FileSystem remoteFS = CrawlEnvironment.getDefaultFileSystem();
            // fully resolve slave names 
            HashSet<String> onlineSlaves = new HashSet<String>();
            for (String slave : _slaveNameToOnlineStateMap.keySet()) {
                LOG.info("Slave:" + slave + " maps to FQN:" + InetAddress.getByName(slave).getCanonicalHostName());
                onlineSlaves.add(InetAddress.getByName(slave).getCanonicalHostName());
            }

            // load master index .. 
            _masterIndex = new DatabaseIndexV2.MasterDatabaseIndex(_configuration, remoteFS, _dataDriveCount,
                    _databaseId, onlineSlaves);

            // initialize record store
            _recordStore.initialize(databasePath, null);

            // load db state ... 
            _masterState = (MasterState) _recordStore.getRecordByKey(MasterDBStateKey);

            if (_masterState == null) {
                _masterState = new MasterState();
                _masterState.setLastQueryId(0);
                _recordStore.beginTransaction();
                _recordStore.insertRecord("", MasterDBStateKey, _masterState);
                _recordStore.commitTransaction();
            }

            // create server channel ... 
            AsyncServerChannel channel = new AsyncServerChannel(this, this.getEventLoop(), this.getServerAddress(),
                    this);

            // register RPC services it supports ... 
            registerService(channel, QueryServerMaster.spec);
        } catch (IOException e) {
            LOG.error("Database Initialization Failed with Exception:" + CCStringUtils.stringifyException(e));
            return false;
        }

        if (_tempFileDir == null) {
            _tempFileDir = new File(getDataDirectory(), "qserver_temp");
            _tempFileDir.mkdirs();

            LOG.info("TempFilr Dir is null. Setting TempFile Dir to:" + _tempFileDir.getAbsolutePath());
        }

        if (_webAppRoot == null) {
            try {
                _webAppRoot = getDefaultWebAppPath();
            } catch (IOException e) {
                LOG.error(CCStringUtils.stringifyException(e));
                return false;
            }
            LOG.info("WebApp Root not specified.Using default at:" + _webAppRoot.getAbsolutePath());
        }

        try {
            // load database state ... 
            // loadState();
            // connect to slaves ...
            connectToSlaves();
        } catch (IOException e) {
            LOG.error(CCStringUtils.stringifyException(e));
            return false;
        }

        // clear working directory... 
        try {
            LOG.info("Clearing working directory:" + _hdfsWorkingDir);
            CrawlEnvironment.getDefaultFileSystem().delete(new Path(_hdfsWorkingDir, "*"), true);
            LOG.info("Cleared working directory:" + _hdfsWorkingDir);
        } catch (IOException e1) {
            LOG.error(CCStringUtils.stringifyException(e1));
        }

        try {
            // locate query db path 
            _queryDBPath = locateQueryDBPath();

        } catch (IOException e) {
            LOG.error("Failed to locate QueryDB Path with Exception:" + CCStringUtils.stringifyException(e));
            return false;
        }

        if (_queryDBPath == null) {
            LOG.error("Failed to find queryDB candidate.");
            return false;
        }

        try {
            _queryServerFE = new QueryServerFE(this, _webAppRoot);
        } catch (IOException e) {
            LOG.error(CCStringUtils.stringifyException(e));
            return false;
        }

        return true;
    }

    private File getTempDirForQuery(long queryId) {
        return new File(_tempFileDir, Long.toString(queryId) + "-" + _tempFileDirSeed);
    }

    private void writeMasterState() throws IOException {
        _recordStore.beginTransaction();
        _recordStore.updateRecordByKey(MasterDBStateKey, _masterState);
        _recordStore.commitTransaction();
    }

    private long getNextQueryId() throws IOException {
        long nextQueryId = _masterState.getLastQueryId() + 1;
        _masterState.setLastQueryId(nextQueryId);
        writeMasterState();
        return nextQueryId;
    }

    private void insertUpdatePersistentInfo(PersistentQueryInfo persistentQueryInfo, boolean isUpdate)
            throws IOException {

        persistentQueryInfo.setLastAccessTime(System.currentTimeMillis());

        _recordStore.beginTransaction();
        if (isUpdate) {
            _recordStore.updateRecordByKey(CachedQueryPrefix + persistentQueryInfo.getCannonicalQueryId(),
                    persistentQueryInfo);
        } else {
            _recordStore.insertRecord(CachedQueryIDPrefix + persistentQueryInfo.getQueryId(),
                    CachedQueryPrefix + persistentQueryInfo.getCannonicalQueryId(), persistentQueryInfo);
        }
        _recordStore.commitTransaction();

    }

    private PersistentQueryInfo getPersistentQueryInfo(String canonicalId) throws IOException {
        return (PersistentQueryInfo) _recordStore.getRecordByKey(CachedQueryPrefix + canonicalId);
    }

    @Override
    protected boolean parseArguements(String[] argv) {

        for (int i = 0; i < argv.length; ++i) {
            if (argv[i].equalsIgnoreCase("--slaves")) {
                _slavesFile = argv[++i];
            } else if (argv[i].equalsIgnoreCase("--databaseId")) {
                _databaseId = Long.parseLong(argv[++i]);
            } else if (argv[i].equalsIgnoreCase("--localDataDir")) {
                _localDataDir = new Path(argv[++i]);
            } else if (argv[i].equalsIgnoreCase("--dataDriveCount")) {
                _dataDriveCount = Integer.parseInt(argv[++i]);
            } else if (argv[i].equalsIgnoreCase("--cacheFileDir")) {
                String paths = argv[++i];
                String splitPaths[] = paths.split(",");
                _cacheDirs = new File[splitPaths.length];
                int index = 0;
                for (String path : splitPaths) {
                    _cacheDirs[index] = new File(path);
                    _cacheDirs[index].mkdirs();
                    if (!_cacheDirs[index].isDirectory()) {
                        LOG.error("Invalid Cache Directory Specified:" + _cacheDirs[index].getAbsolutePath());
                        return false;
                    }
                    index++;
                }
            } else if (argv[i].equalsIgnoreCase("--tempFileDir")) {
                _tempFileDir = new File(argv[++i]);
                // delete the directory contents up front 
                FileUtils.recursivelyDeleteFile(_tempFileDir);
                // and recreate
                _tempFileDir.mkdirs();
                if (!_tempFileDir.isDirectory()) {
                    LOG.error("Invalid Temp Directory Specified:" + _tempFileDir.getAbsolutePath());
                    return false;
                }
            } else if (argv[i].equalsIgnoreCase("--webAppRoot")) {
                _webAppRoot = new File(argv[++i]);
                if (!_webAppRoot.isDirectory()) {
                    LOG.error("Invalid Web App Directory Specified:" + _webAppRoot.getAbsolutePath());
                    return false;
                }
            }

        }
        return true;
    }

    @Override
    protected void printUsage() {
        System.out.println("Required Parameters: --domainFile domainFilePath --cacheFileDir cacheFileDirectory");
    }

    @Override
    protected boolean startDaemons() {
        return true;
    }

    @Override
    protected void stopDaemons() {
    }

    private Path locateQueryDBPath() throws IOException {
        FileSystem fs = CrawlEnvironment.getDefaultFileSystem();

        FileStatus statusArray[] = fs.globStatus(new Path("crawl/querydb/db/*"));

        Path candidatePath = null;
        for (FileStatus fileStatus : statusArray) {
            if (candidatePath == null) {
                candidatePath = fileStatus.getPath();
            } else {
                long prevTimestamp = Long.parseLong(candidatePath.getName());
                long currentTimestamp = Long.parseLong(fileStatus.getPath().getName());
                if (currentTimestamp > prevTimestamp) {
                    candidatePath = fileStatus.getPath();
                }
            }
        }
        if (candidatePath != null) {
            LOG.info("Selected Candidate Path:" + candidatePath);
        }
        return candidatePath;
    }

    public BaseConfig getBaseConfigForSlave(QueryServerSlaveState slave) {

        BaseConfig baseConfig = new BaseConfig();

        baseConfig.setBaseWorkingDir(_hdfsWorkingDir);
        baseConfig.setQueryResultsDir(_hdfsResultsDir);
        baseConfig.setQueryCacheDir(_hdfsResultsCacheDir);
        baseConfig.setQueryDBPath(_queryDBPath.toString());
        // baseConfig.setFileSystem(_fileSystem.getUri().toString());
        baseConfig.setDatabaseTimestamp(_databaseId);

        return baseConfig;
    }

    void connectToSlaves() throws IOException {
        LOG.info("Connecting to Slaves");
        for (QueryServerSlaveState slave : _slaves) {
            slave.connect();
        }
    }

    void parseSlavesFile() throws IOException {

        LOG.info("Loading Slaves File from:" + _slavesFile);
        InputStream stream = null;
        URL resourceURL = CrawlEnvironment.getHadoopConfig().getResource(_slavesFile);

        if (resourceURL != null) {
            stream = resourceURL.openStream();
        }
        // try as filename 
        else {
            LOG.info("Could not load resource as an URL. Trying as an absolute pathname");
            stream = new FileInputStream(new File(_slavesFile));
        }

        if (stream == null) {
            throw new FileNotFoundException();
        }

        BufferedReader reader = new BufferedReader(new InputStreamReader(new BufferedInputStream(stream)));

        String slaveHostPlusCount = null;

        LOG.info("Loading slaves file");
        while ((slaveHostPlusCount = reader.readLine()) != null) {
            if (!slaveHostPlusCount.startsWith("#")) {
                StringTokenizer tokenizer = new StringTokenizer(slaveHostPlusCount, ":");
                if (tokenizer.countTokens() != 3) {
                    throw new IOException("Invalid Slave Entry:" + slaveHostPlusCount + " in slaves File");
                } else {
                    String slaveName = tokenizer.nextToken();
                    //TODO:INSTANCE COUNT IS IGNORED !!!
                    QueryServerSlaveState state = new QueryServerSlaveState(this, slaveName);
                    LOG.info("Adding slave:" + slaveName);
                    _slaves.add(state);
                    // map host name to onlinestate 
                    _slaveNameToOnlineStateMap.put(slaveName, state);
                    // and add SlaveState entry 
                    _slaveStatusMap.put(slaveName, new SlaveStatus());
                }
            }
        }

        // now close the file and reopen to to compute the crc ... 
        reader.close();
        stream.close();

        CRC32 fileCRC = new CRC32();

        InputStream crcStream = null;

        if (resourceURL != null) {
            crcStream = resourceURL.openStream();
        } else {
            LOG.info("Could not load resource as an URL. Trying as an absolute pathname");
            crcStream = new FileInputStream(new File(_slavesFile));
        }

        byte[] buf = new byte[4096];
        int nRead = 0;
        while ((nRead = crcStream.read(buf, 0, buf.length)) > 0) {
            fileCRC.update(buf, 0, nRead);
        }

        _slavesFileCRC = fileCRC.getValue();
        LOG.info("Slaves File CRC is:" + _slavesFileCRC);

        crcStream.close();

    }

    @SuppressWarnings("unchecked")
    void slaveStatusChanged(QueryServerSlaveState slave, SlaveStatus slaveStatus) {
        // LOG.info("Received slaveStatusChanged from slave:" + slave.getFullyQualifiedName());

        if (slaveStatus != null && slaveStatus.getQueryStatus().size() != 0) {

            //LOG.info("Received:" + slaveStatus.getQueryStatus() + " QueryStatus updated from Slave:" +  slave.getFullyQualifiedName());

            // broadcast all query changes ... 
            for (QueryStatus queryStatus : slaveStatus.getQueryStatus()) {
                //LOG.info("RCVD Status for Query:" + queryStatus.getQueryId() + " Status:" + QueryStatus.Status.toString(queryStatus.getStatus()));
                QueryRequest request = _activeRemoteOrLocalQueries.get(queryStatus.getQueryId());
                if (request != null) {
                    //LOG.info("FOUND QueryRequestObj:" + request + " for Query:" + queryStatus.getQueryId());
                    try {
                        request.getSourceQuery().updateQueryStatusForSlave(slave.getHostName(), queryStatus);
                    } catch (IOException e) {
                        LOG.error("Error Updating QueryStatus for Query:" + request.getSourceQuery().getQueryId()
                                + " Slave:" + slave.getFullyQualifiedName() + " Error:"
                                + CCStringUtils.stringifyException(e));
                    }
                } else {
                    LOG.error("DID NOT FIND QueryRequestObj for Query:" + queryStatus.getQueryId());
                }
            }
            // clear query status array ...
            slaveStatus.getQueryStatus().clear();
        }

        try {
            if (slaveStatus != null) {
                _slaveStatusMap.get(slave.getHostName()).merge(slaveStatus);
            } else {
                _slaveStatusMap.get(slave.getHostName()).clear();
            }
        } catch (CloneNotSupportedException e) {
        }

        try {
            potentiallyStartNextQuery();
        } catch (IOException e) {
            LOG.error("Error encountered calling startNextQuery. Exception:" + CCStringUtils.stringifyException(e));
        }
    }

    private void completeContentQuery(AsyncContext<ContentQueryRPCInfo, ContentQueryRPCResult> rpcContext,
            ArcFileItem item) {
        if (item != null) {
            rpcContext.getOutput().setSuccess(true);
            rpcContext.getOutput().setArcFileResult(item);
            rpcContext.setStatus(AsyncRequest.Status.Success);
            try {
                rpcContext.completeRequest();
            } catch (RPCException e) {
                LOG.error(CCStringUtils.stringifyException(e));
            }
        } else {
            rpcContext.getOutput().setSuccess(false);
            rpcContext.setStatus(AsyncRequest.Status.Error_RequestFailed);
            try {
                rpcContext.completeRequest();
            } catch (RPCException e) {
                LOG.error(CCStringUtils.stringifyException(e));
            }
        }
    }

    private void startS3Download(final AsyncContext<ContentQueryRPCInfo, ContentQueryRPCResult> rpcContext,
            final ArchiveInfo archiveInfo) {
        _s3DownloaderThreadPool.submit(new ConcurrentTask<ArcFileItem>(_eventLoop, new Callable<ArcFileItem>() {

            @Override
            public ArcFileItem call() throws Exception {
                LOG.info("Starting S3 Download for URL:" + rpcContext.getInput().getUrl());
                return S3Helper.retrieveArcFileItem(archiveInfo, _eventLoop);
            }

        }, new CompletionCallback<ArcFileItem>() {

            @Override
            public void taskComplete(ArcFileItem loadResult) {
                LOG.info("S3 Download for URL:" + rpcContext.getInput().getUrl() + " Completed with "
                        + ((loadResult == null) ? "NULL" : "Valid") + "load Result");
                completeContentQuery(rpcContext, loadResult);
            }

            @Override
            public void taskFailed(Exception e) {
                LOG.error("S3 Download for URL:" + rpcContext.getInput().getUrl() + " Failed with Exception:"
                        + CCStringUtils.stringifyException(e));
                completeContentQuery(rpcContext, null);
            }

        }));
    }

    @Override
    public void doContentQuery(final AsyncContext<ContentQueryRPCInfo, ContentQueryRPCResult> rpcContext)
            throws RPCException {
        /*    
        LOG.info("Got ContentQuery RPC for URL:" + rpcContext.getInput().getUrl() + "Sending directly to slaves");
            
        final ContentQueryState queryState = new ContentQueryState();
            
        for (QueryServerSlaveState slaveState : _slaves) { 
          if (slaveState.getRemoteStub() != null) {
            queryState.totalDispatchCount++;
            slaveState.getRemoteStub().doMetadataQuery(rpcContext.getInput(),new AsyncRequest.Callback<ContentQueryRPCInfo, CrawlDatumAndMetadata>() {
            
              @Override
              public void requestComplete(final AsyncRequest<ContentQueryRPCInfo, CrawlDatumAndMetadata> request) {
        queryState.completedCount++;
        if (request.getStatus() == AsyncRequest.Status.Success && !queryState.done) {
          queryState.done = true;
          // found a valid result ...
          LOG.info("Found Metadata for URL:" + rpcContext.getInput().getUrl()); 
          // check to see if archive information is available for the this url ...
          ArchiveInfo archiveInfo = null;
          if (request.getOutput().getMetadata().getArchiveInfo().size() != 0) { 
            Collections.sort(request.getOutput().getMetadata().getArchiveInfo(), new Comparator<ArchiveInfo> () {
            
              @Override
              public int compare(ArchiveInfo o1, ArchiveInfo o2) {
                return (o1.getArcfileDate() < o2.getArcfileDate()) ? -1 : (o1.getArcfileDate() > o2.getArcfileDate()) ? 1 : 0; 
              } 
            
            });
            archiveInfo = request.getOutput().getMetadata().getArchiveInfo().get(request.getOutput().getMetadata().getArchiveInfo().size()-1);
          }
          // if archive info is available ... 
          if (archiveInfo != null) {
            LOG.info("Archive Info Found for URL:" + rpcContext.getInput().getUrl() + " Starting S3Download");
            // start a download thread ...
            startS3Download(rpcContext,archiveInfo);
          }
          // otherwsie ... fail request ... 
          else { 
            LOG.info("Archive Info not Found for URL:" + rpcContext.getInput().getUrl() + " Failing Request");
            completeContentQuery(rpcContext,null);
          }
        }
            
        if (!queryState.done && queryState.completedCount == queryState.totalDispatchCount) { 
          // ok all the queries failed to return results ... fail the request ...
          LOG.info("All Queries Completed and Failed for MetadataQuery for URL:" + rpcContext.getInput().getUrl());
          rpcContext.setStatus(AsyncRequest.Status.Error_RequestFailed);
          try {
            rpcContext.completeRequest();
          } catch (RPCException e) {
            LOG.error(CCStringUtils.stringifyException(e));
          }
        }
              } 
            });
          }
        }
         */
        try {
            rpcContext.completeRequest();
        } catch (RPCException e) {
            LOG.error(CCStringUtils.stringifyException(e));
        }

    }

    @Override
    public void IncomingClientConnected(AsyncClientChannel channel) {

    }

    @Override
    public void IncomingClientDisconnected(AsyncClientChannel channel) {

    }

    @Override
    public ArrayList<ShardIndexHostNameTuple> mapShardIdsForIndex(String indexName) throws IOException {
        ArrayList<ShardIndexHostNameTuple> tupleListOut = _masterIndex.mapShardIdsForIndex(indexName);
        if (tupleListOut == null) {
            throw new IOException("Unable to find tupleListMapping for Index:" + indexName);
        }
        return tupleListOut;
    }
}