org.apache.manifoldcf.crawler.connectors.webcrawler.CookieManager.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.manifoldcf.crawler.connectors.webcrawler.CookieManager.java

Source

/* $Id: CookieManager.java 988245 2010-08-23 18:39:35Z kwright $ */

/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.manifoldcf.crawler.connectors.webcrawler;

import java.util.*;
import java.io.*;
import org.apache.manifoldcf.core.interfaces.*;
import org.apache.manifoldcf.crawler.interfaces.*;
import org.apache.manifoldcf.authorities.interfaces.*;
import org.apache.manifoldcf.crawler.interfaces.CacheKeyFactory;
import org.apache.manifoldcf.crawler.system.ManifoldCF;
import org.apache.manifoldcf.crawler.system.Logging;

import org.apache.http.cookie.Cookie;
import org.apache.http.cookie.ClientCookie;
import org.apache.http.impl.cookie.BasicClientCookie2;

/** This class manages the database table into which we write cookies.  The data resides in the database,
* as well as in cache (up to a certain point).  The result is that there is a memory limited, database-backed repository
* of cookies that we can draw on.
* 
* <br><br>
* <b>cookiedata</b>
* <table border="1" cellpadding="3" cellspacing="0">
* <tr class="TableHeadingColor">
* <th>Field</th><th>Type</th><th>Description&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</th>
* <tr><td>sequencekey</td><td>VARCHAR(255)</td><td></td></tr>
* <tr><td>ordinal</td><td>BIGINT</td><td></td></tr>
* <tr><td>domainspecified</td><td>CHAR(1)</td><td></td></tr>
* <tr><td>domain</td><td>LONGTEXT</td><td></td></tr>
* <tr><td>name</td><td>LONGTEXT</td><td></td></tr>
* <tr><td>value</td><td>LONGTEXT</td><td></td></tr>
* <tr><td>pathspecified</td><td>CHAR(1)</td><td></td></tr>
* <tr><td>path</td><td>LONGTEXT</td><td></td></tr>
* <tr><td>versionspecified</td><td>CHAR(1)</td><td></td></tr>
* <tr><td>version</td><td>BIGINT</td><td></td></tr>
* <tr><td>comment</td><td>LONGTEXT</td><td></td></tr>
* <tr><td>secure</td><td>CHAR(1)</td><td></td></tr>
* <tr><td>expirationdate</td><td>BIGINT</td><td></td></tr>
* <tr><td>discard</td><td>CHAR(1)</td><td></td></tr>
* <tr><td>commenturl</td><td>LONGTEXT</td><td></td></tr>
* <tr><td>portblank</td><td>CHAR(1)</td><td></td></tr>
* <tr><td>portspecified</td><td>CHAR(1)</td><td></td></tr>
* <tr><td>ports</td><td>LONGTEXT</td><td></td></tr>
* </table>
* <br><br>
* 
*/
public class CookieManager extends org.apache.manifoldcf.core.database.BaseTable {
    public static final String _rcsid = "@(#)$Id: CookieManager.java 988245 2010-08-23 18:39:35Z kwright $";

    // Robots cache class.  Only one needed.
    protected static CookiesCacheClass cookiesCacheClass = new CookiesCacheClass();

    // Database fields
    protected final static String keyField = "sequencekey";
    protected final static String ordinalField = "ordinal";
    // The rest of these individual fields are here only because the &^*% httpclient Cookie class doesn't have a constructor that
    // accepts the string form, so we're forced to keep all the cookie construction arguments individually.
    protected final static String domainSpecifiedField = "domainspecified";
    protected final static String domainField = "domain";
    protected final static String nameField = "name";
    protected final static String valueField = "value";
    protected final static String pathSpecifiedField = "pathspecified";
    protected final static String pathField = "path";
    protected final static String versionSpecifiedField = "versionspecified";
    protected final static String versionField = "version";
    protected final static String commentField = "comment";
    protected final static String secureField = "secure";
    protected final static String expirationDateField = "expirationdate";
    protected final static String discardField = "discard";
    protected final static String commentURLField = "commenturl";
    protected final static String portBlankField = "portblank";
    protected final static String portSpecifiedField = "portspecified";
    protected final static String portField = "ports";

    // Cache manager.  This handle is set up during the constructor.
    ICacheManager cacheManager;

    /** Constructor.  Note that one cookiemanager handle is only useful within a specific thread context,
    * so the calling connector object logic must recreate the handle whenever the thread context changes.
    *@param tc is the thread context.
    *@param database is the database handle.
    */
    public CookieManager(IThreadContext tc, IDBInterface database) throws ManifoldCFException {
        super(database, "cookiedata");
        cacheManager = CacheManagerFactory.make(tc);
    }

    /** Install the manager.
    */
    public void install() throws ManifoldCFException {
        beginTransaction();
        try {
            Map existing = getTableSchema(null, null);
            if (existing == null) {
                // Install the table.
                HashMap map = new HashMap();
                map.put(keyField, new ColumnDescription("VARCHAR(255)", false, false, null, null, false));
                map.put(ordinalField, new ColumnDescription("BIGINT", false, false, null, null, false));
                // The rest of the fields allow us to recreate Cookie objects from the database so we can hand them
                // to httpclient.  (It would be better if we just kept the cookie data around, but that's not how httpclient works.)
                map.put(domainSpecifiedField, new ColumnDescription("CHAR(1)", false, false, null, null, false));
                map.put(domainField, new ColumnDescription("LONGTEXT", false, true, null, null, false));
                map.put(nameField, new ColumnDescription("LONGTEXT", false, true, null, null, false));
                map.put(valueField, new ColumnDescription("LONGTEXT", false, true, null, null, false));
                map.put(pathSpecifiedField, new ColumnDescription("CHAR(1)", false, false, null, null, false));
                map.put(pathField, new ColumnDescription("LONGTEXT", false, true, null, null, false));
                map.put(versionSpecifiedField, new ColumnDescription("CHAR(1)", false, false, null, null, false));
                map.put(versionField, new ColumnDescription("BIGINT", false, true, null, null, false));
                map.put(commentField, new ColumnDescription("LONGTEXT", false, true, null, null, false));
                map.put(secureField, new ColumnDescription("CHAR(1)", false, false, null, null, false));
                map.put(expirationDateField, new ColumnDescription("BIGINT", false, true, null, null, false));
                map.put(discardField, new ColumnDescription("CHAR(1)", false, false, null, null, false));
                map.put(commentURLField, new ColumnDescription("LONGTEXT", false, true, null, null, false));
                map.put(portBlankField, new ColumnDescription("CHAR(1)", false, false, null, null, false));
                map.put(portSpecifiedField, new ColumnDescription("CHAR(1)", false, false, null, null, false));
                map.put(portField, new ColumnDescription("LONGTEXT", false, true, null, null, false));
                performCreate(map, null);

                // Create the appropriate indices
                ArrayList list = new ArrayList();
                list.add(keyField);
                addTableIndex(false, list);
            }
        } catch (ManifoldCFException e) {
            signalRollback();
            throw e;
        } catch (Error e) {
            signalRollback();
            throw e;
        } finally {
            endTransaction();
        }
    }

    /** Uninstall the manager.
    */
    public void deinstall() throws ManifoldCFException {
        performDrop(null);
    }

    /** Read cookies currently in effect for a given session key.
    *@param sessionKey is the session key.
    *@return the login cookies object.
    */
    public LoginCookies readCookies(String sessionKey) throws ManifoldCFException {
        // Build description objects
        CookiesDescription[] objectDescriptions = new CookiesDescription[1];
        StringSetBuffer ssb = new StringSetBuffer();
        ssb.add(getCookiesCacheKey(sessionKey));
        objectDescriptions[0] = new CookiesDescription(sessionKey, new StringSet(ssb));

        CookiesExecutor exec = new CookiesExecutor(this, objectDescriptions[0]);
        cacheManager.findObjectsAndExecute(objectDescriptions, null, exec, getTransactionID());

        // Expiration is in fact done by the web site; the cookies will be updated if necessary.
        return exec.getResults();
    }

    /** Update cookes that are in effect for a given session key.
    *@param sessionKey is the session key.
    *@param cookies are the cookies to write into the database.
    */
    public void updateCookies(String sessionKey, LoginCookies cookies) throws ManifoldCFException {
        StringSetBuffer ssb = new StringSetBuffer();
        ssb.add(getCookiesCacheKey(sessionKey));
        StringSet cacheKeys = new StringSet(ssb);
        ICacheHandle ch = cacheManager.enterCache(null, cacheKeys, getTransactionID());
        try {
            beginTransaction();
            try {
                // Delete any old cookies, and create new ones
                ArrayList list = new ArrayList();
                list.add(sessionKey);
                performDelete("WHERE " + keyField + "=?", list, null);

                // Now, insert the new cookies
                int i = 0;
                while (i < cookies.getCookieCount()) {
                    Cookie c = cookies.getCookie(i);
                    HashMap map = new HashMap();
                    map.put(keyField, sessionKey);
                    map.put(ordinalField, new Long(i));
                    String domain = c.getDomain();
                    if (domain != null && domain.length() > 0)
                        map.put(domainField, domain);
                    map.put(domainSpecifiedField, booleanToString(domain != null && domain.length() > 0));
                    String name = c.getName();
                    if (name != null && name.length() > 0)
                        map.put(nameField, name);
                    String value = c.getValue();
                    if (value != null && value.length() > 0)
                        map.put(valueField, value);
                    String path = c.getPath();
                    if (path != null && path.length() > 0)
                        map.put(pathField, path);
                    map.put(pathSpecifiedField, booleanToString(path != null && path.length() > 0));
                    map.put(versionField, new Long(c.getVersion()));
                    // Make something up.  It may not be correct, but there's really no choice.
                    map.put(versionSpecifiedField, booleanToString(true));
                    String comment = c.getComment();
                    if (comment != null && comment.length() > 0)
                        map.put(commentField, comment);
                    map.put(secureField, booleanToString(c.isSecure()));
                    Date expirationDate = c.getExpiryDate();
                    if (expirationDate != null)
                        map.put(expirationDateField, new Long(expirationDate.getTime()));
                    //map.put(discardField,booleanToString(!c.isPersistent()));
                    map.put(discardField, booleanToString(false));
                    String commentURL = c.getCommentURL();
                    if (commentURL != null && commentURL.length() > 0)
                        map.put(commentURLField, commentURL);
                    int[] ports = c.getPorts();
                    if (ports != null && ports.length > 0)
                        map.put(portField, portsToString(ports));
                    map.put(portBlankField, booleanToString(ports == null || ports.length == 0));
                    map.put(portSpecifiedField, booleanToString(ports != null && ports.length > 0));
                    performInsert(map, null);
                    i++;
                }

                cacheManager.invalidateKeys(ch);
            } catch (ManifoldCFException e) {
                signalRollback();
                throw e;
            } catch (Error e) {
                signalRollback();
                throw e;
            } finally {
                endTransaction();
            }
        } finally {
            cacheManager.leaveCache(ch);
        }
    }

    // Protected methods and classes

    /** Construct a global key which represents an individual session.
    *@param sessionKey is the session key.
    *@return the cache key.
    */
    protected static String getCookiesCacheKey(String sessionKey) {
        return "COOKIES_" + sessionKey;
    }

    /** Read cookies from database, uncached.
    *@param sessionKey is the session key.
    *@return the login cookies object.
    */
    protected LoginCookies readCookiesUncached(String sessionKey) throws ManifoldCFException {
        ArrayList list = new ArrayList();
        list.add(sessionKey);
        IResultSet result = performQuery(
                "SELECT * FROM " + getTableName() + " WHERE " + keyField + "=? ORDER BY " + ordinalField + " ASC",
                list, null, null);
        DynamicCookieSet dcs = new DynamicCookieSet();
        int i = 0;
        while (i < result.getRowCount()) {
            IResultRow row = result.getRow(i++);
            String name = (String) row.getValue(nameField);
            String value = (String) row.getValue(valueField);
            BasicClientCookie2 c = new BasicClientCookie2(name, value);
            String domain = (String) row.getValue(domainField);
            if (domain != null && domain.length() > 0)
                c.setDomain(domain);
            //c.setDomainAttributeSpecified(stringToBoolean((String)row.getValue(domainSpecifiedField)));
            String path = (String) row.getValue(pathField);
            if (path != null && path.length() > 0)
                c.setPath(path);
            //c.setPathAttributeSpecified(stringToBoolean((String)row.getValue(pathSpecifiedField)));
            Long version = (Long) row.getValue(versionField);
            if (version != null)
                c.setVersion((int) version.longValue());
            //c.setVersionAttributeSpecified(stringToBoolean((String)row.getValue(versionSpecifiedField)));
            String comment = (String) row.getValue(commentField);
            if (comment != null)
                c.setComment(comment);
            c.setSecure(stringToBoolean((String) row.getValue(secureField)));
            Long expirationDate = (Long) row.getValue(expirationDateField);
            if (expirationDate != null)
                c.setExpiryDate(new Date(expirationDate.longValue()));
            c.setDiscard(stringToBoolean((String) row.getValue(discardField)));
            String commentURL = (String) row.getValue(commentURLField);
            if (commentURL != null && commentURL.length() > 0)
                c.setCommentURL(commentURL);
            String ports = (String) row.getValue(portField);
            // Ports are comma-separated
            if (ports != null && ports.length() > 0)
                c.setPorts(stringToPorts(ports));
            //c.setPortAttributeBlank(stringToBoolean((String)row.getValue(portBlankField)));
            //c.setPortAttributeSpecified(stringToBoolean((String)row.getValue(portSpecifiedField)));

            dcs.addCookie(c);
        }
        return dcs;
    }

    /** Convert a boolean string to a boolean.
    */
    protected static boolean stringToBoolean(String value) throws ManifoldCFException {
        if (value.equals("T"))
            return true;
        else if (value.equals("F"))
            return false;
        else
            throw new ManifoldCFException("Expected T or F but saw " + value);
    }

    /** Convert a boolean to a boolean string.
    */
    protected static String booleanToString(boolean value) {
        if (value)
            return "T";
        else
            return "F";
    }

    /** Convert a string to a port array.
    */
    protected static int[] stringToPorts(String value) throws ManifoldCFException {
        String[] ports = value.split(",");
        int[] rval = new int[ports.length];
        int i = 0;
        while (i < rval.length) {
            try {
                rval[i] = Integer.parseInt(ports[i]);
            } catch (NumberFormatException e) {
                throw new ManifoldCFException(e.getMessage(), e);
            }
            i++;
        }
        return rval;
    }

    /** Convert a port array to a string.
    */
    protected static String portsToString(int[] ports) {
        StringBuilder sb = new StringBuilder();
        int i = 0;
        while (i < ports.length) {
            if (i > 0)
                sb.append(",");
            sb.append(Integer.toString(ports[i]));
            i++;
        }
        return sb.toString();
    }

    /** This is a set of cookies, built dynamically.
    */
    protected static class DynamicCookieSet implements LoginCookies {
        protected List<Cookie> cookies = new ArrayList<Cookie>();

        public DynamicCookieSet() {
        }

        public void addCookie(Cookie c) {
            cookies.add(c);
        }

        public int getCookieCount() {
            return cookies.size();
        }

        public Cookie getCookie(int index) {
            return cookies.get(index);
        }
    }

    /** This is the object description for a session key object.
    * This is the key that is used to look up cached data.
    */
    protected static class CookiesDescription extends org.apache.manifoldcf.core.cachemanager.BaseDescription {
        protected String sessionKey;
        protected String criticalSectionName;
        protected StringSet cacheKeys;

        public CookiesDescription(String sessionKey, StringSet invKeys) {
            super("cookiescache");
            this.sessionKey = sessionKey;
            criticalSectionName = getClass().getName() + "-" + sessionKey;
            cacheKeys = invKeys;
        }

        public String getSessionKey() {
            return sessionKey;
        }

        public int hashCode() {
            return sessionKey.hashCode();
        }

        public boolean equals(Object o) {
            if (!(o instanceof CookiesDescription))
                return false;
            CookiesDescription d = (CookiesDescription) o;
            return d.sessionKey.equals(sessionKey);
        }

        public String getCriticalSectionName() {
            return criticalSectionName;
        }

        /** Get the cache keys for an object (which may or may not exist yet in
        * the cache).  This method is called in order for cache manager to throw the correct locks.
        * @return the object's cache keys, or null if the object should not
        * be cached.
        */
        public StringSet getObjectKeys() {
            return cacheKeys;
        }

        /** Get the object class for an object.  The object class is used to determine
        * the group of objects treated in the same LRU manner.
        * @return the newly created object's object class, or null if there is no
        * such class, and LRU behavior is not desired.
        */
        public ICacheClass getObjectClass() {
            return cookiesCacheClass;
        }
    }

    /** Cache class for robots.
    * An instance of this class describes the cache class for cookie caching.  There's
    * only ever a need for one, so that will be created statically.
    */
    protected static class CookiesCacheClass implements ICacheClass {
        /** Get the name of the object class.
        * This determines the set of objects that are treated in the same
        * LRU pool.
        *@return the class name.
        */
        public String getClassName() {
            // We count all the cookies, so this is a constant string.
            return "COOKIESCLASS";
        }

        /** Get the maximum LRU count of the object class.
        *@return the maximum number of the objects of the particular class
        * allowed.
        */
        public int getMaxLRUCount() {
            // Hardwired for the moment; 2000 cookies records will be cached,
            // and no more.
            return 2000;
        }

    }

    /** This is the executor object for locating cookies session objects.
    * This object furnishes the operations the cache manager needs to rebuild objects that it needs that are
    * not in the cache at the moment.
    */
    protected static class CookiesExecutor extends org.apache.manifoldcf.core.cachemanager.ExecutorBase {
        // Member variables
        protected CookieManager thisManager;
        protected LoginCookies returnValue;
        protected CookiesDescription thisDescription;

        /** Constructor.
        *@param manager is the RobotsManager class instance.
        *@param objectDescription is the desired object description.
        */
        public CookiesExecutor(CookieManager manager, CookiesDescription objectDescription) {
            super();
            thisManager = manager;
            thisDescription = objectDescription;
            returnValue = null;
        }

        /** Get the result.
        *@return the looked-up or read cached instance.
        */
        public LoginCookies getResults() {
            return returnValue;
        }

        /** Create a set of new objects to operate on and cache.  This method is called only
        * if the specified object(s) are NOT available in the cache.  The specified objects
        * should be created and returned; if they are not created, it means that the
        * execution cannot proceed, and the execute() method will not be called.
        * @param objectDescriptions is the set of unique identifier of the object.
        * @return the newly created objects to cache, or null, if any object cannot be created.
        *  The order of the returned objects must correspond to the order of the object descriptinos.
        */
        public Object[] create(ICacheDescription[] objectDescriptions) throws ManifoldCFException {
            // I'm not expecting multiple values to be requested, so it's OK to walk through the objects
            // and do a request at a time.
            LoginCookies[] rval = new LoginCookies[objectDescriptions.length];
            int i = 0;
            while (i < rval.length) {
                CookiesDescription desc = (CookiesDescription) objectDescriptions[i];
                rval[i] = thisManager.readCookiesUncached(desc.getSessionKey());
                i++;
            }

            return rval;
        }

        /** Notify the implementing class of the existence of a cached version of the
        * object.  The object is passed to this method so that the execute() method below
        * will have it available to operate on.  This method is also called for all objects
        * that are freshly created as well.
        * @param objectDescription is the unique identifier of the object.
        * @param cachedObject is the cached object.
        */
        public void exists(ICacheDescription objectDescription, Object cachedObject) throws ManifoldCFException {
            // Cast what came in as what it really is
            CookiesDescription objectDesc = (CookiesDescription) objectDescription;
            LoginCookies cookiesData = (LoginCookies) cachedObject;
            if (objectDesc.equals(thisDescription))
                returnValue = cookiesData;
        }

        /** Perform the desired operation.  This method is called after either createGetObject()
        * or exists() is called for every requested object.
        */
        public void execute() throws ManifoldCFException {
            // Does nothing; we only want to fetch objects in this cacher.
        }
    }

}