com.silverwrist.venice.std.TrackbackManager.java Source code

Introduction

Here is the source code for com.silverwrist.venice.std.TrackbackManager.java
Source

/*
 * The contents of this file are subject to the Mozilla Public License Version 1.1
 * (the "License"); you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at <http://www.mozilla.org/MPL/>.
 * 
 * Software distributed under the License is distributed on an "AS IS" basis, WITHOUT
 * WARRANTY OF ANY KIND, either express or implied. See the License for the specific
 * language governing rights and limitations under the License.
 * 
 * The Original Code is the Venice Web Communities System.
 * 
 * The Initial Developer of the Original Code is Eric J. Bowersox <erbo@users.sf.net>,
 * for Silverwrist Design Studios.  Portions created by Eric J. Bowersox are
 * Copyright (C) 2004-2006 Eric J. Bowersox/Silverwrist Design Studios.  All Rights Reserved.
 * 
 * Contributor(s): 
 */
package com.silverwrist.venice.std;

import java.io.*;
import java.net.*;
import java.text.*;
import java.util.*;
import javax.mail.internet.ContentType;
import javax.xml.parsers.*;
import org.apache.commons.httpclient.*;
import org.apache.commons.httpclient.methods.*;
import org.apache.commons.httpclient.params.HttpMethodParams;
import org.apache.commons.httpclient.util.*;
import org.apache.log4j.Logger;
import org.apache.regexp.*;
import org.w3c.dom.*;
import org.xml.sax.*;
import com.silverwrist.util.*;
import com.silverwrist.venice.except.TrackbackException;

/**
 * A Singleton class which implements the management of trackback items.  The implementation conforms to
 * the Moveable Type Trackback Protocol, Version 1.1.
 *
 * @author Eric J. Bowersox &lt;erbo@users.sf.net&gt;
 * @version $Id: TrackbackManager.java,v 1.1 2004/12/30 05:55:01 erbo Exp $
 */
public class TrackbackManager {
    /*--------------------------------------------------------------------------------
     * Internal class which caches information about a particular page
     *--------------------------------------------------------------------------------
     */

    /**
     * Internal class which caches information about a particular page read via HTTP.
     *
     * @author Eric J. Bowersox &lt;erbo@users.sf.net&gt;
     * @version $Id: TrackbackManager.java,v 1.1 2004/12/30 05:55:01 erbo Exp $
     */
    private class PageAttributes {
        /*====================================================================
         * Attributes
         *====================================================================
         */

        /** Last modification date for this page. */
        private java.util.Date m_last_modified = null;

        /** Expiration time for the page. */
        private long m_expires = 0;

        /** Allow reset of expire time from page metadata if this flag is <code>true</code>. */
        private boolean m_expire_meta = true;

        /** List of all trackback items that were loaded on this page. */
        private final LinkedList m_tb_items;

        /*====================================================================
         * Constructor
         *====================================================================
         */

        /**
         * Creates a new <code>PageAttributes</code> instance.
         *
         * @param response {@link org.apache.commons.httpclient.HttpMethod HttpMethod} containing response headers.
         */
        PageAttributes(HttpMethod response) {
            Header hdr = null;
            try { // get the Last-Modified date from the response
                hdr = response.getResponseHeader("Last-Modified");
                if (hdr != null)
                    m_last_modified = DateUtil.parseDate(hdr.getValue());
                else
                    m_last_modified = null;

            } // end try
            catch (DateParseException e) { // Last-Modified date cannot be parsed
                logger.debug("failed to parse Last-Modified date", e);
                m_last_modified = null;

            } // end catch

            boolean look_for_expire = true;
            hdr = response.getResponseHeader("Cache-Control");
            if (hdr != null) { // first, try to match s-maxage
                String s = null;
                RE m = new RE(s_cache_smaxage, RE.MATCH_CASEINDEPENDENT);
                if (m.match(hdr.getValue()))
                    s = m.getParen(1);
                else { // try to match max-age
                    m = new RE(s_cache_maxage, RE.MATCH_CASEINDEPENDENT);
                    if (m.match(hdr.getValue()))
                        s = m.getParen(1);

                } // end else

                if (s != null) { // parse the delta-seconds value and create an expiration time
                    m_expires = System.currentTimeMillis() + (Long.parseLong(s) * 1000L);
                    m_expire_meta = false;
                    look_for_expire = false;

                } // end if

            } // end if

            if (look_for_expire) { // OK, didn't find it there, try here
                java.util.Date d = null;
                try { // get the Expires date from the response
                    hdr = response.getResponseHeader("Expires");
                    if (hdr != null)
                        d = DateUtil.parseDate(hdr.getValue());

                } // end try
                catch (DateParseException e) { // Expires date cannot be parsed
                    logger.debug("failed to parse Expires date", e);
                    d = null;

                } // end catch

                if (d != null)
                    m_expires = d.getTime(); // save time value

            } // end if

            m_tb_items = new LinkedList();

        } // end constructor

        /*====================================================================
         * Public getters/setters
         *====================================================================
         */

        /**
         * Returns the last modification date for the given page.
         *
         * @return See above.
         */
        java.util.Date getLastModified() {
            return m_last_modified;

        } // end getLastModified

        /*====================================================================
         * External operations
         *====================================================================
         */

        /**
         * Adds a new {@link com.silverwrist.venice.std.TrackbackItem TrackbackItem} to the list managed
         * by this attributes object.
         *
         * @param tb The <code>TrackbackItem</code> to be added.
         */
        void addItem(TrackbackItem tb) {
            m_tb_items.add(tb);

        } // end addItem

        /**
         * Update the data in this <code>PageAttributes</code> object from the <META HTTP-EQUIV=...> tags
         * in the actual page.
         *
         * @param page_attrs The {@link java.util.Map Map} containing the <META HTTP-EQUIV=...> data to use.
         */
        void updateFromPage(Map page_attrs) {
            java.util.Date d = null;
            String s = (String) (page_attrs.get("LAST-MODIFIED"));
            try { // attempt to parse this date
                if (s != null)
                    d = DateUtil.parseDate(s);

            } // end try
            catch (DateParseException e) { // parse failed
                logger.debug("failed to parse Last-Modified date", e);
                d = null;

            } // end catch

            if (d != null) // update last modified date
                m_last_modified = d;

            if (m_expire_meta) { // want to look for an expiration date as well
                d = null;
                s = (String) (page_attrs.get("EXPIRES"));
                try { // attempt to parse this date
                    if (s != null)
                        d = DateUtil.parseDate(s);

                } // end try
                catch (DateParseException e) { // parse failed
                    logger.debug("failed to parse Expires date", e);
                    d = null;

                } // end catch

                if (d != null) // update expiration date
                    m_expires = d.getTime();

            } // end if

        } // end updateFromPage

        /**
         * Check to see if this entry has expired, and, if so, remove all mention of it from the item and
         * page caches.
         *
         * @return <code>true</code> if the item has expired, <code>false</code> if not.
         */
        boolean expire() {
            if (m_expires == 0)
                return false;
            if (m_expires >= System.currentTimeMillis())
                return false;
            synchronized (TrackbackManager.this) { // need to frob m_item_cache and m_page_cache here...
                for (Iterator it = m_tb_items.iterator(); it.hasNext();) { // remove all items depending on this PageAttributes
                    TrackbackItem foo = (TrackbackItem) (it.next());
                    m_item_cache.remove(foo.getItem());

                } // end for

                ArrayList baleeted = new ArrayList();
                for (Iterator it = m_page_cache.entrySet().iterator(); it.hasNext();) { // find the keys that point to this entry
                    Map.Entry ntry = (Map.Entry) (it.next());
                    if (ntry.getValue() == this)
                        baleeted.add(ntry.getKey());

                } // end for

                for (Iterator it = baleeted.iterator(); it.hasNext();)
                    m_page_cache.remove(it.next()); // purge out cache entries

            } // end synchronized block

            return true;

        } // end expire

    } // end class PageAttributes

    /*--------------------------------------------------------------------------------
     * Internal implementation of TrackbackItem
     *--------------------------------------------------------------------------------
     */

    /**
     * Internal implementation of {@link com.silverwrist.venice.std.TrackbackItem TrackbackItem}.
     *
     * @author Eric J. Bowersox &lt;erbo@users.sf.net&gt;
     * @version $Id: TrackbackManager.java,v 1.1 2004/12/30 05:55:01 erbo Exp $
     */
    private class MyTrackbackItem extends TrackbackItem {
        /*====================================================================
         * Attributes
         *====================================================================
         */

        /**
         * The attributes for the page we found this trackback item on.
         */
        private final PageAttributes m_attrs;

        /*====================================================================
         * Constructor
         *====================================================================
         */

        /**
         * Constructs a new instance of <code>MyTrackbackItem</code>.
         *
         * @param item The URL of the trackback item.
         * @param trackback The "trackback ping URL" for this particular trackback item.
         * @param title The title associated with this trackback item.
         * @param attrs The page attributes for the page we found the item on.
         */
        MyTrackbackItem(URL item, URL trackback, String title, PageAttributes attrs) {
            super(item, trackback, title);
            m_attrs = attrs;
            m_attrs.addItem(this);

        } // end constructor

        /*====================================================================
         * Abstract implementations from class TrackbackItem
         *====================================================================
         */

        /**
         * Send a trackback ping to this trackback URL.
         *
         * @param url The URL of the trackback item to send to.
         * @param parms The parameters to pass to the trackback operation.
         * @exception com.silverwrist.venice.except.TrackbackException If the trackback could not be sent.
         */
        protected void sendPing(String url, NameValuePair[] parms) throws TrackbackException {
            // Construct the PostMethod and fill in the parameters.
            PostMethod meth = new PostMethod(url);
            meth.setDoAuthentication(false);
            meth.setFollowRedirects(true);
            meth.setRequestHeader("User-Agent", USER_AGENT);
            meth.setRequestBody(parms);

            synchronized (TrackbackManager.this) { // since we use the HTTP client and the parser...
                boolean get_resp = false;
                try { // execute the method!
                    int rc = m_http_client.executeMethod(meth);
                    get_resp = true;
                    if (rc != HttpStatus.SC_OK) // this is farked!
                        throw new TrackbackException("POST to " + url + " returned " + rc);

                    // The response from the method will be XML.  Parse it.
                    InputStream istm = meth.getResponseBodyAsStream();
                    get_resp = false;
                    Document doc = m_tbresp_parser.parse(istm);

                    // probe the returned XML tree
                    Element root = doc.getDocumentElement();
                    if (!(root.getTagName().equals("response")))
                        throw new TrackbackException("trackback response malformed - expected <response/> element");
                    DOMElementHelper h = new DOMElementHelper(root);
                    String s = h.getSubElementText("error");
                    if (s == null)
                        throw new TrackbackException(
                                "trackback response malformed - expected <error/> subelement of <response/>");
                    if (s.equals("1")) { // error - get message and throw it
                        s = h.getSubElementText("message");
                        if (s != null)
                            throw new TrackbackException("Trackback ping failed: " + s);
                        else
                            throw new TrackbackException("Trackback ping failed with no response given");

                    } // end if
                    else if (!(s.equals("0")))
                        throw new TrackbackException(
                                "trackback response malformed - expected <error/> value of either 0 or 1");
                    // else we were successful

                } // end try
                catch (IOException e) { // catch any I/O errors from executing the method
                    throw new TrackbackException("I/O error sending trackback ping: " + e.getMessage(), e);

                } // end catch
                catch (SAXException e) { // XML parser screwed up
                    throw new TrackbackException("trackback response malformed - not valid XML", e);

                } // end catch
                finally { // release the connection if possible
                    try { // need to get the response body
                        if (get_resp)
                            meth.getResponseBody();

                    } // end try
                    catch (IOException e) { // ignore these here
                    } // end catch

                    meth.releaseConnection();

                } // end finally

            } // end synchronized block

        } // end sendPing

        /*====================================================================
         * Public getters/setters
         *====================================================================
         */

        /**
         * Return the attributes from the page from which this trackback item was loaded.
         *
         * @return See above.
         */
        PageAttributes getAttributes() {
            return m_attrs;

        } // end getAttributes

        /*====================================================================
         * External operations
         *====================================================================
         */

        /**
         * Check to see if the page from which this trackback item was loaded has expired, and, if so, remove
         * all mention of it from the item and page caches.
         *
         * @return <code>true</code> if the page from which this trackback item was loaded has expired,
         *         <code>false</code> if not.
         */
        boolean expire() {
            return m_attrs.expire();

        } // end expire

    } // end class MyTrackbackItem

    /*--------------------------------------------------------------------------------
     * Static data members
     *--------------------------------------------------------------------------------
     */

    /** The instance of {@link org.apache.log4j.Logger Logger} for this class's use. */
    private static Logger logger = Logger.getLogger(TrackbackManager.class);

    /** The Singleton instance of this class. */
    private static TrackbackManager s_self = null;

    /** The User-Agent string to use. */
    private static final String USER_AGENT = "Venice Web Communities System/0.01 (TrackbackManager)";

    /** The XML namespace for RDF elements. */
    private static final String NS_RDF = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";

    /** The XML namespace for Dublin Core elements. */
    private static final String NS_DC = "http://purl.org/dc/elements/1.1/";

    /* The XML namespace for trackback elements. */
    private static final String NS_TRACKBACK = "http://madskills.com/public/xml/rss/module/trackback/";

    /** The date format to format output HTTP dates. */
    private static final DateFormat s_httpdate_format;

    /** The regular expression compiler we use. */
    private static final RECompiler COMPILER = new RECompiler();

    /** Regular expression program to recognize the s-maxage directive in a Cache-Control header. */
    private static REProgram s_cache_smaxage = null;

    /** Regular expression program to recognize the max-age directive in a Cache-Control header. */
    private static REProgram s_cache_maxage = null;

    /** Regular expression program for recognizing META tags. */
    private static REProgram s_meta_tag = null;

    /** Regular expression program for recognizing HTTP-EQUIV attributes. */
    private static REProgram s_http_equiv_attr = null;

    /** Regular expression program for recognizing CONTENT attributes. */
    private static REProgram s_content_attr = null;

    /** Regular expression program for recognizing the start of an RDF block. */
    private static REProgram s_rdf_start = null;

    /*--------------------------------------------------------------------------------
     * Attributes
     *--------------------------------------------------------------------------------
     */

    /** Map from page {@link java.net.URL URL}s to <code>PageAttributes</code> objects. */
    private final Map m_page_cache;

    /** Map from trackback item {@link java.net.URL URL}s to <code>MyTrackbackItem</code> objects. */
    private final Map m_item_cache;

    /** Map from namespace names to end recognizer {@link org.apache.regexp.REProgram REProgram}s. */
    private final Map m_end_recognizers;

    /**
     * The {@link org.apache.commons.httpclient.HttpClient HttpClient} to use to retrieve pages
     * and send pings.
     */
    private final HttpClient m_http_client;

    /** The instance of the XML parser we use to parse RDF blocks.*/
    private DocumentBuilder m_rdf_parser = null;

    /** The instance of the XML parser we use to parse trackback responses. */
    private DocumentBuilder m_tbresp_parser = null;

    /*--------------------------------------------------------------------------------
     * Constructor
     *--------------------------------------------------------------------------------
     */

    /**
     * Only one instance of this class can/should exist.
     */
    private TrackbackManager() {
        m_page_cache = new HashMap();
        m_item_cache = new HashMap();
        m_end_recognizers = new HashMap();
        m_http_client = new HttpClient();

        try { // create the XML parsers we use
            DocumentBuilderFactory fact = DocumentBuilderFactory.newInstance();
            fact.setCoalescing(true);
            fact.setExpandEntityReferences(true);
            fact.setIgnoringComments(true);
            fact.setNamespaceAware(true);
            fact.setValidating(false);
            m_rdf_parser = fact.newDocumentBuilder();
            fact.setCoalescing(true);
            fact.setExpandEntityReferences(true);
            fact.setIgnoringComments(true);
            fact.setNamespaceAware(false);
            fact.setValidating(false);
            m_tbresp_parser = fact.newDocumentBuilder();

        } // end try
        catch (ParserConfigurationException e) { // this is bad!
            logger.fatal("XML parser creation failed", e);

        } // end catch

    } // end constructor

    /*--------------------------------------------------------------------------------
     * Internal operations
     *--------------------------------------------------------------------------------
     */

    /**
     * Extracts an attribute value from the start of the string.  The attribute value may be enclosed
     * in quotes, or may simply be a series of nonblank characters delimited by blanks.
     *
     * @param s The string to extract the attribute value from.
     * @return The attribute value extracted.
     */
    private static final String extractAttribute(String s) {
        char[] a = s.toCharArray();
        int i = 0;
        while ((i < a.length) && Character.isWhitespace(a[i]))
            i++;
        if (i == a.length)
            return "";
        int st = i;
        if ((a[st] == '\'') || (a[st] == '\"')) { // find quoted string boundaries
            i++;
            while ((i < a.length) && (a[i] != a[st]))
                i++;
            if (i == a.length)
                return "";
            st++;

        } // end if
        else { // skip over non-whitespace
            while ((i < a.length) && !(Character.isWhitespace(a[i])))
                i++;
            // if i==a.length, just take the "rest"

        } // end else

        if (i == a.length)
            return s.substring(st);
        else
            return s.substring(st, i);

    } // end extractAttribute

    /**
     * Extracts all <META HTTP-EQUIV=...> tags from the given page content and returns them
     * as a {@link java.util.Map Map}.
     *
     * @param content The content of the page to scan.
     * @return A non-modifiable <code>Map</code> containing all the <META HTTP-EQUIV=...> tags from the page.
     *         The keys are the HTTP-EQUIV names, converted to upper-case.  The values are the CONTENT values.
     */
    private static final Map extractHttpEquivTags(String content) {
        Map rc = new HashMap();
        RE m = new RE(s_meta_tag, RE.MATCH_CASEINDEPENDENT);
        int pos = 0;
        while (m.match(content, pos)) { // get the meta tag itself
            String s = m.getParen(0);
            RE m2 = new RE(s_http_equiv_attr, RE.MATCH_CASEINDEPENDENT);
            if (m2.match(s)) { // extract the HTTP-EQUIV name
                String name = extractAttribute(s.substring(m2.getParenEnd(0))).toUpperCase();
                m2 = new RE(s_content_attr, RE.MATCH_CASEINDEPENDENT);
                if (m2.match(s)) { // extract the CONTENT value
                    String val = extractAttribute(s.substring(m2.getParenEnd(0)));

                    // put the value into the return Map
                    String exist = (String) (rc.get(name));
                    if (exist == null)
                        rc.put(name, val);
                    else
                        rc.put(name, exist + ", " + val);

                } // end if
                  // else ignore this one

            } // end if
              // else ignore this one

            // reset position for next time
            pos = m.getParenEnd(0);

        } // end while

        if (rc.isEmpty())
            return Collections.EMPTY_MAP;
        else
            return Collections.unmodifiableMap(rc);

    } // end extractHttpEquivTags

    /**
     * Given a specific namespace tag, returns a {@link org.apache.regexp.REProgram REProgram} which recognizes
     * the end-RDF tag.
     *
     * @param rdfns The namespace tag to use.
     * @return The correct <code>REProgram</code>.
     */
    private synchronized REProgram getEndRecognizer(String rdfns) {
        REProgram pgm = (REProgram) (m_end_recognizers.get(rdfns));
        if (pgm == null) { // create the end recognizer expression and add it
            StringBuffer buf = new StringBuffer("</");
            char[] a = rdfns.toCharArray();
            for (int i = 0; i < a.length; i++) { // need to escape "." characters
                if (a[i] == '.')
                    buf.append('\\');
                buf.append(a[i]);

            } // end for

            buf.append(":RDF\\s*>");
            try { // compile the program and save it
                pgm = COMPILER.compile(buf.toString());
                m_end_recognizers.put(rdfns, pgm);

            } // end try
            catch (RESyntaxException e) { // shouldn't happen
                if (logger.isDebugEnabled())
                    logger.debug("End pattern compilation of \"" + buf.toString() + "\" failed", e);

            } // end catch

        } // end if

        return pgm;

    } // end getEndRecognizer

    /**
     * Loads the HTTP content at the specified URL, scans it for RDF description blocks, and adds those blocks
     * as {@link com.silverwrist.venice.std.TrackbackItem TrackbackItem}s to our internal cache.  Uses modification
     * detection to keep from reloading a page unless necessary.
     *
     * @param url The URL of the resource to be loaded.
     * @param attrs The attributes of the specified page; if this is <code>null</code>, we'll check the page
     *              cache for the right attributes.
     * @return <code>true</code> if the page data was loaded and scanned for trackback items; <code>false</code>
     *         if no data was loaded (because it was not modified since the last time we loaded it, for instance).
     * @exception com.silverwrist.venice.except.TrackbackException If there was an error loading or interpreting
     *            the page data.
     */
    private synchronized boolean load(URL url, PageAttributes attrs) throws TrackbackException {
        if (attrs == null)
            attrs = (PageAttributes) (m_page_cache.get(url));

        // Create the GET method and set its headers.
        String s = url.toString();
        int x = s.lastIndexOf('#');
        if (x >= 0)
            s = s.substring(0, x);
        GetMethod getter = new GetMethod(s);
        HttpMethodParams params = getter.getParams();
        getter.setDoAuthentication(false);
        getter.setFollowRedirects(true);
        getter.setRequestHeader("User-Agent", USER_AGENT);
        getter.setRequestHeader("Accept", "text/*");
        getter.setRequestHeader("Accept-Encoding", "identity");
        params.setParameter(HttpMethodParams.RETRY_HANDLER, new DefaultHttpMethodRetryHandler());
        getter.setParams(params);

        boolean get_resp = false;
        PageAttributes newattrs = null;
        ContentType ctype = null;
        byte[] rawdata = null;
        try { // set the Last-Modified date as an If-Modified-Since header on the request
            java.util.Date lmod = null;
            if (attrs != null)
                lmod = attrs.getLastModified();
            if (lmod != null)
                getter.setRequestHeader("If-Modified-Since", s_httpdate_format.format(lmod));

            // execute the Get method!
            int rc = m_http_client.executeMethod(getter);
            get_resp = true;
            if ((lmod != null) && (rc == HttpStatus.SC_NOT_MODIFIED))
                return false; // we were not modified
            if (rc == HttpStatus.SC_NO_CONTENT)
                return false; // there's no content there
            if (rc != HttpStatus.SC_OK) // this is farked!
                throw new TrackbackException("GET of " + url + " returned " + rc);

            // Get the new page attributes and save them off.
            newattrs = new PageAttributes(getter);
            m_page_cache.put(url, newattrs);

            // Get the Content-Type header and see if it's valid.
            Header hdr = getter.getResponseHeader("Content-Type");
            if (hdr != null)
                s = hdr.getValue();
            else
                s = "text/plain"; // necessary assumption
            ctype = new ContentType(s);
            if (!(ctype.getPrimaryType().equals("text")))
                throw new TrackbackException("URL " + url + " does not point to a text-based resource");

            // Load the resource in as byte data; we will determine the right character set for it later.
            rawdata = getter.getResponseBody();
            get_resp = false;

        } // end try
        catch (IOException e) { // IO error getting the page
            throw new TrackbackException("I/O error retrieving " + url + ": " + e.getMessage(), e);

        } // end catch
        catch (javax.mail.internet.ParseException e) { // translate into TrackbackException
            throw new TrackbackException("invalid Content-Type received for URL " + url, e);

        } // end catch
        finally { // release the connection if possible
            try { // need to get the message body
                if (get_resp)
                    getter.getResponseBody();

            } // end try
            catch (IOException e) { // ignore these
            } // end catch

            getter.releaseConnection();

        } // end finally

        // make a first guess at the charset from the HTTP header Content-Type
        String cset = ctype.getParameter("charset");
        if (cset == null)
            cset = "US-ASCII";
        String content = null;
        try { // interpret the content
            content = new String(rawdata, cset);

        } // end try
        catch (UnsupportedEncodingException e) { // fall back and try just using US-ASCII
            cset = null;
            try { // interpret the content
                content = new String(rawdata, "US-ASCII");

            } // end try
            catch (UnsupportedEncodingException e2) { // can't happen
                logger.debug("WTF? US-ASCII should damn well be a supported character set!", e2);

            } // end catch

        } // end catch

        // Look for <META HTTP-EQUIV=...> tags in the content.
        Map http_attrs = extractHttpEquivTags(content);

        // Try to get a Content-Type attribute from there.
        s = (String) (http_attrs.get("CONTENT-TYPE"));
        String cset2 = null;
        if (s != null) { // look for the content type
            try { // parse into Content-Type
                ContentType c = new ContentType(s);
                if (c.getPrimaryType().equals("text"))
                    cset2 = c.getParameter("charset");

            } // end try
            catch (javax.mail.internet.ParseException e) { // can't get a second Content-Type
                logger.debug("parse of Content-Type from META tags failed", e);
                cset2 = null;

            } // end catch

        } // end if

        if ((cset == null) && (cset2 == null))
            throw new TrackbackException("unable to determine character set for " + url);
        if ((cset2 != null) && ((cset == null) || !(cset.equalsIgnoreCase(cset2)))) { // reinterpret content in new character set
            try { // reinterpret content in new character set
                s = new String(rawdata, cset2);
                content = s;

                // the contents of the HTTP-EQUIV tags may have changed as a result
                http_attrs = extractHttpEquivTags(content);

            } // end try
            catch (UnsupportedEncodingException e) { // just use original character set
                if (cset == null)
                    throw new TrackbackException("unable to determine character set for " + url);

            } // end catch

        } // end if

        newattrs.updateFromPage(http_attrs); // update the page attributes from the META tag data

        // Search the page content for RDF blocks.
        RE m = new RE(s_rdf_start, RE.MATCH_NORMAL);
        int pos = 0;
        while (m.match(content, pos)) { // look for the end of this RDF block
            RE m2 = new RE(getEndRecognizer(m.getParen(1)), RE.MATCH_NORMAL);
            if (m2.match(content, m.getParenEnd(0))) { // we now have a block to feed to the XML parser
                try { // run the block through the XML parser
                    InputSource isrc = new InputSource(
                            new StringReader(content.substring(m.getParenStart(0), m2.getParenEnd(0))));
                    Document doc = m_rdf_parser.parse(isrc);

                    // examine topmost element, which should be rdf:RDF
                    Element root = doc.getDocumentElement();
                    if (NS_RDF.equals(root.getNamespaceURI()) && (root.getLocalName() != null)
                            && root.getLocalName().equals("RDF")) { // this is most definitely an rdf:RDF node...look for rdf:Description nodes under it
                        NodeList nl = root.getChildNodes();
                        for (int i = 0; i < nl.getLength(); i++) { // check each node in the list
                            Node n = nl.item(i);
                            if ((n.getNodeType() == Node.ELEMENT_NODE) && NS_RDF.equals(n.getNamespaceURI())
                                    && (n.getLocalName() != null) && n.getLocalName().equals("Description")) { // we've got an rdf:Description node...extract the attributes from it
                                Element elt = (Element) n;
                                try { // look for the item and trackback URLs
                                    URL item = null, trackback = null;
                                    s = elt.getAttributeNS(NS_DC, "identifier");
                                    if ((s != null) && (s.length() > 0))
                                        item = new URL(s);
                                    s = elt.getAttributeNS(NS_TRACKBACK, "ping");
                                    if ((s != null) && (s.length() > 0))
                                        trackback = new URL(s);
                                    if ((item != null) && (trackback != null)) { // create the item
                                        s = elt.getAttributeNS(NS_DC, "title");
                                        m_item_cache.put(item, new MyTrackbackItem(item, trackback, s, newattrs));

                                    } // end if

                                } // end try
                                catch (MalformedURLException e) { // this means skip this item
                                    logger.warn("URL parse failure", e);

                                } // end catch

                            } // end if

                        } // end for

                    } // end if

                } // end try
                catch (IOException e) { // disregard this block
                    logger.warn("RDF block parse failure", e);

                } // end catch
                catch (SAXException e) { // disregard this block
                    logger.warn("RDF block parse failure", e);

                } // end catch

            } // end if
              // else ignore this possible block

            pos = m.getParenEnd(0);

        } // end while

        return true;

    } // end load

    /**
     * Get an item from the items cache under one of two different URLs.
     *
     * @param url1 First URL to look under.
     * @param url2 Second URL to look under.
     * @return The item found in the cache, or <code>null</code> if not found.
     */
    private synchronized MyTrackbackItem getItem(URL url1, URL url2) {
        MyTrackbackItem rc = (MyTrackbackItem) (m_item_cache.get(url1));
        if (rc == null)
            rc = (MyTrackbackItem) (m_item_cache.get(url2));
        return rc;

    } // end getItem

    /*--------------------------------------------------------------------------------
     * External operations
     *--------------------------------------------------------------------------------
     */

    /**
     * Given the URL of a trackback item, return the associated
     * {@link com.silverwrist.venice.std.TrackbackItem TrackbackItem} object, if it can be found.
     *
     * @param url The {@link java.net.URL URL} of the trackback item to look for.
     * @return The associated <code>TrackbackItem</code>, or <code>null</code> if it could not be found.
     * @exception com.silverwrist.venice.except.TrackbackException If there was an error looking for trackback items.
     */
    public TrackbackItem getItem(URL url) throws TrackbackException {
        URL normurl = url;
        if (url.getRef() != null) { // normalize the URL
            try { // we normalize it by chopping at the hashmark
                String s = url.toString();
                int n = s.lastIndexOf('#');
                normurl = new URL(s.substring(0, n));

            } // end try
            catch (MalformedURLException e) { // forget it
                normurl = url;

            } // end catch

        } // end if

        MyTrackbackItem rc = getItem(url, normurl);
        if ((rc != null) && rc.expire())
            rc = getItem(url, normurl); // expired - re-get
        if (rc != null) { // see if we need to reload the item
            if (load(url, rc.getAttributes()))
                rc = getItem(url, normurl);

        } // end if
        else { // try loading the URL directly
            load(url, null);
            rc = getItem(url, normurl);

        } // end else

        return rc;

    } // end getItem

    /**
     * Scans a specified resource and adds any trackback items found in the page to our cache.
     *
     * @param url The {@link java.net.URL URL} of the resource to be loaded.
     * @exception com.silverwrist.venice.except.TrackbackException If there was an error looking for trackback items.
     */
    public void addPage(URL url) throws TrackbackException {
        load(url, null);

    } // end addPage

    /**
     * Returns a {@link java.util.Collection Collection} of all
     * {@link com.silverwrist.venice.std.TrackbackItem TrackbackItem}s currently in the cache.
     *
     * @return See above.
     */
    public Collection getCachedItems() {
        ArrayList rc = null;
        synchronized (this) { // grab value contents
            rc = new ArrayList(m_item_cache.values());

        } // end synchronized block

        if (rc.isEmpty())
            return Collections.EMPTY_LIST;
        else
            return Collections.unmodifiableList(rc);

    } // end getCachedItems

    /*--------------------------------------------------------------------------------
     * External static operations
     *--------------------------------------------------------------------------------
     */

    /**
     * Return the Singleton instance of <code>TrackbackManager</code>.
     *
     * @return See above.
     */
    public static synchronized TrackbackManager get() {
        if (s_self == null)
            s_self = new TrackbackManager();
        return s_self;

    } // end get

    /*--------------------------------------------------------------------------------
     * Static initializer
     *--------------------------------------------------------------------------------
     */

    static {
        // initialize date format for output
        DateFormat foo = new SimpleDateFormat(DateUtil.PATTERN_RFC1123);
        foo.setTimeZone(TimeZone.getTimeZone("GMT"));
        s_httpdate_format = foo;
        try { // compile all the regular expressions
            s_cache_smaxage = COMPILER.compile("s-maxage\\s*=\\s*(\\d+)");
            s_cache_maxage = COMPILER.compile("max-age\\s*=\\s*(\\d+)");
            s_meta_tag = COMPILER.compile("<meta\\s+[^>]*>");
            s_http_equiv_attr = COMPILER.compile("http-equiv\\s*=");
            s_content_attr = COMPILER.compile("content\\s*=");
            s_rdf_start = COMPILER.compile("<([A-Za-z_][A-Za-z0-9_.-]*):RDF\\s+");

        } // end try
        catch (RESyntaxException e) { // this is very bad
            logger.fatal("Regular expression compile failure", e);

        } // end catch

    } // end static initializer

} // end class TrackbackManager