com.gargoylesoftware.htmlunit.Cache.java Source code

Java tutorial

Introduction

Here is the source code for com.gargoylesoftware.htmlunit.Cache.java

Source

/*
 * Copyright (c) 2002-2016 Gargoyle Software Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.gargoylesoftware.htmlunit;

import java.io.Serializable;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.http.client.utils.DateUtils;
import org.w3c.dom.css.CSSStyleSheet;

/**
 * <p>Simple cache implementation which caches compiled JavaScript files and parsed CSS snippets. Caching
 * compiled JavaScript files avoids unnecessary web requests and additional compilation overhead, while
 * caching parsed CSS snippets avoids very expensive CSS parsing.</p>
 *
 * @author Marc Guillemot
 * @author Daniel Gredler
 * @author Ahmed Ashour
 */
public class Cache implements Serializable {

    /** The maximum size of the cache. */
    private int maxSize_ = 40;

    private static final Pattern DATE_HEADER_PATTERN = Pattern.compile("-?\\d+");

    /**
     * The map which holds the cached responses. Note that when keying on URLs, we key on the string version
     * of the URLs, rather than on the URLs themselves. This is done for performance, because a) the
     * {@link java.net.URL#hashCode()} method is synchronized, and b) the {@link java.net.URL#hashCode()}
     * method triggers DNS lookups of the URL hostnames' IPs. As of this writing, the HtmlUnit unit tests
     * run ~20% faster whey keying on strings rather than on {@link java.net.URL} instances.
     */
    private final Map<String, Entry> entries_ = Collections.synchronizedMap(new HashMap<String, Entry>(maxSize_));

    /**
     * A cache entry.
     */
    private static class Entry implements Comparable<Entry>, Serializable {
        private final String key_;
        private WebResponse response_;
        private Object value_;
        private long lastAccess_;

        Entry(final String key, final WebResponse response, final Object value) {
            key_ = key;
            response_ = response;
            value_ = value;
            lastAccess_ = System.currentTimeMillis();
        }

        /**
         * {@inheritDoc}
         */
        @Override
        public int compareTo(final Entry other) {
            if (lastAccess_ < other.lastAccess_) {
                return -1;
            }
            if (lastAccess_ == other.lastAccess_) {
                return 0;
            }
            return 1;
        }

        /**
         * {@inheritDoc}
         */
        @Override
        public boolean equals(final Object obj) {
            return obj instanceof Entry && lastAccess_ == ((Entry) obj).lastAccess_;
        }

        /**
         * {@inheritDoc}
         */
        @Override
        public int hashCode() {
            return ((Long) lastAccess_).hashCode();
        }

        /**
         * Updates the last access date.
         */
        public void touch() {
            lastAccess_ = System.currentTimeMillis();
        }
    }

    /**
     * Caches the specified object, if the corresponding request and response objects indicate
     * that it is cacheable.
     *
     * @param request the request corresponding to the specified compiled script
     * @param response the response corresponding to the specified compiled script
     * @param toCache the object that is to be cached, if possible (may be for instance a compiled script or
     * simply a WebResponse)
     * @return whether the response was cached or not
     */
    public boolean cacheIfPossible(final WebRequest request, final WebResponse response, final Object toCache) {
        final boolean wasCached;
        if (isCacheable(request, response)) {
            final String url = response.getWebRequest().getUrl().toString();
            final Entry entry = new Entry(url, response, toCache);
            entries_.put(entry.key_, entry);
            deleteOverflow();
            wasCached = true;
        } else {
            wasCached = false;
        }
        return wasCached;
    }

    /**
     * Caches the parsed version of the specified CSS snippet. We key the cache based on CSS snippets (rather
     * than requests and responses as is done above) because a) this allows us to cache inline CSS, b) CSS is
     * extremely expensive to parse, so we want to avoid it as much as possible, c) CSS files aren't usually
     * nearly as large as JavaScript files, so memory bloat won't be too bad, and d) caching on requests and
     * responses requires checking dynamicity (see {@link #isCacheableContent(WebResponse)}), and headers often
     * aren't set up correctly, disallowing caching when in fact it should be allowed.
     *
     * @param css the CSS snippet from which <tt>styleSheet</tt> is derived
     * @param styleSheet the parsed version of <tt>css</tt>
     */
    public void cache(final String css, final CSSStyleSheet styleSheet) {
        final Entry entry = new Entry(css, null, styleSheet);
        entries_.put(entry.key_, entry);
        deleteOverflow();
    }

    /**
     * Truncates the cache to the maximal number of entries.
     */
    protected void deleteOverflow() {
        synchronized (entries_) {
            while (entries_.size() > maxSize_) {
                final Entry oldestEntry = Collections.min(entries_.values());
                entries_.remove(oldestEntry.key_);
                if (oldestEntry.response_ != null) {
                    oldestEntry.response_.cleanUp();
                }
            }
        }
    }

    /**
     * Determines if the specified response can be cached.
     *
     * @param request the performed request
     * @param response the received response
     * @return {@code true} if the response can be cached
     */
    protected boolean isCacheable(final WebRequest request, final WebResponse response) {
        return HttpMethod.GET == response.getWebRequest().getHttpMethod() && isCacheableContent(response);
    }

    /**
     * <p>Tries to guess if the content is dynamic or not.</p>
     *
     * <p>"Since origin servers do not always provide explicit expiration times, HTTP caches typically
     * assign heuristic expiration times, employing algorithms that use other header values (such as the
     * <tt>Last-Modified</tt> time) to estimate a plausible expiration time".</p>
     *
     * <p>The current implementation considers as dynamic content everything except responses with a
     * <tt>Last-Modified</tt> header with a date older than 10 minutes or with an <tt>Expires</tt> header
     * specifying expiration in more than 10 minutes.</p>
     *
     * @see <a href="http://www.w3.org/Protocols/rfc2616/rfc2616-sec13.html">RFC 2616</a>
     * @param response the response to examine
     * @return {@code true} if the response should be considered as cacheable
     */
    protected boolean isCacheableContent(final WebResponse response) {
        final Date lastModified = parseDateHeader(response, "Last-Modified");
        final Date expires = parseDateHeader(response, "Expires");

        final long delay = 10 * org.apache.commons.lang3.time.DateUtils.MILLIS_PER_MINUTE;
        final long now = getCurrentTimestamp();

        return expires != null && (expires.getTime() - now > delay)
                || (expires == null && lastModified != null && (now - lastModified.getTime() > delay));
    }

    /**
     * Gets the current time stamp. As method to allow overriding it, when simulating an other time.
     * @return the current time stamp
     */
    protected long getCurrentTimestamp() {
        return System.currentTimeMillis();
    }

    /**
     * Parses and returns the specified date header of the specified response. This method
     * returns {@code null} if the specified header cannot be found or cannot be parsed as a date.
     *
     * @param response the response
     * @param headerName the header name
     * @return the specified date header of the specified response
     */
    protected Date parseDateHeader(final WebResponse response, final String headerName) {
        final String value = response.getResponseHeaderValue(headerName);
        if (value == null) {
            return null;
        }
        final Matcher matcher = DATE_HEADER_PATTERN.matcher(value);
        if (matcher.matches()) {
            return new Date();
        }
        return DateUtils.parseDate(value);
    }

    /**
     * Returns the cached response corresponding to the specified request. If there is
     * no corresponding cached object, this method returns {@code null}.
     *
     * @param request the request whose corresponding response is sought
     * @return the cached response corresponding to the specified request if any
     */
    public WebResponse getCachedResponse(final WebRequest request) {
        if (HttpMethod.GET != request.getHttpMethod()) {
            return null;
        }
        final Entry cachedEntry = entries_.get(request.getUrl().toString());
        if (cachedEntry == null) {
            return null;
        }
        synchronized (entries_) {
            cachedEntry.touch();
        }
        return cachedEntry.response_;
    }

    /**
     * Returns the cached object corresponding to the specified request. If there is
     * no corresponding cached object, this method returns {@code null}.
     *
     * @param request the request whose corresponding cached compiled script is sought
     * @return the cached object corresponding to the specified request if any
     */
    public Object getCachedObject(final WebRequest request) {
        if (HttpMethod.GET != request.getHttpMethod()) {
            return null;
        }
        final Entry cachedEntry = entries_.get(request.getUrl().toString());
        if (cachedEntry == null) {
            return null;
        }
        synchronized (entries_) {
            cachedEntry.touch();
        }
        return cachedEntry.value_;
    }

    /**
     * Returns the cached parsed version of the specified CSS snippet. If there is no
     * corresponding cached stylesheet, this method returns {@code null}.
     *
     * @param css the CSS snippet whose cached stylesheet is sought
     * @return the cached stylesheet corresponding to the specified CSS snippet
     */
    public CSSStyleSheet getCachedStyleSheet(final String css) {
        final Entry cachedEntry = entries_.get(css);
        if (cachedEntry == null) {
            return null;
        }
        synchronized (entries_) {
            cachedEntry.touch();
        }
        return (CSSStyleSheet) cachedEntry.value_;
    }

    /**
     * Returns the cache's maximum size. This is the maximum number of files that will
     * be cached. The default is <tt>25</tt>.
     *
     * @return the cache's maximum size
     */
    public int getMaxSize() {
        return maxSize_;
    }

    /**
     * Sets the cache's maximum size. This is the maximum number of files that will
     * be cached. The default is <tt>25</tt>.
     *
     * @param maxSize the cache's maximum size (must be &gt;= 0)
     */
    public void setMaxSize(final int maxSize) {
        if (maxSize < 0) {
            throw new IllegalArgumentException("Illegal value for maxSize: " + maxSize);
        }
        maxSize_ = maxSize;
        deleteOverflow();
    }

    /**
     * Returns the number of entries in the cache.
     *
     * @return the number of entries in the cache
     */
    public int getSize() {
        return entries_.size();
    }

    /**
     * Clears the cache.
     */
    public void clear() {
        synchronized (entries_) {
            for (final Entry entry : entries_.values()) {
                if (entry.response_ != null) {
                    entry.response_.cleanUp();
                }
            }
            entries_.clear();
        }
    }

}