net.yacy.cora.protocol.HeaderFramework.java Source code

Java tutorial

Introduction

Here is the source code for net.yacy.cora.protocol.HeaderFramework.java

Source

/**
 *  HeaderFramework
 *  Copyright 2004 by Michael Peter Christen, mc@yacy.net, Frankfurt a. M., Germany
 *  First released 2004 at http://yacy.net
 *
 *  This library is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU Lesser General Public
 *  License as published by the Free Software Foundation; either
 *  version 2.1 of the License, or (at your option) any later version.
 *
 *  This library is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  Lesser General Public License for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public License
 *  along with this program in the file lgpl21.txt
 *  If not, see <http://www.gnu.org/licenses/>.
 */

package net.yacy.cora.protocol;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.Date;
import java.util.Iterator;
import java.util.Locale;
import java.util.Map;
import java.util.TimeZone;
import java.util.TreeMap;
import java.util.concurrent.ConcurrentHashMap;

import net.yacy.cora.document.encoding.ASCII;
import net.yacy.cora.document.encoding.UTF8;
import net.yacy.cora.util.CommonPattern;
import net.yacy.cora.util.ConcurrentLog;

/**
 * this class implements a key-value mapping, as a hashtable
 * The difference to ordinary hashtable implementations is that the
 * keys are not compared by the equal() method, but are always
 * treated as string and compared as
 * key.uppercase().equal(.uppercase(comparator))
 */
public class HeaderFramework extends TreeMap<String, String> implements Map<String, String> {

    private static final long serialVersionUID = 18L;

    /* =============================================================
     * Constants defining http versions
     * ============================================================= */
    public static final String HTTP_VERSION_0_9 = "HTTP/0.9";
    public static final String HTTP_VERSION_1_0 = "HTTP/1.0";
    public static final String HTTP_VERSION_1_1 = "HTTP/1.1";

    /* =============================================================
     * Constants defining http header names
     * ============================================================= */

    public static final String HOST = "Host";
    public static final String USER_AGENT = "User-Agent";

    public static final String ACCEPT = "Accept";
    public static final String ACCEPT_LANGUAGE = "Accept-Language";
    public static final String ACCEPT_ENCODING = "Accept-Encoding";
    public static final String ACCEPT_CHARSET = "Accept-Charset";

    public static final String CONTENT_LENGTH = "Content-Length";
    public static final String CONTENT_TYPE = "Content-Type";
    public static final String CONTENT_MD5 = "Content-MD5";
    public static final String CONTENT_LOCATION = "Content-Location";
    public static final String CONTENT_ENCODING = "Content-Encoding";
    public static final String TRANSFER_ENCODING = "Transfer-Encoding";
    public static final String PRAGMA = "Pragma";
    public static final String CACHE_CONTROL = "Cache-Control";

    public static final String DATE = "Date"; // time message/response was created, https://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.18
    public static final String LAST_MODIFIED = "Last-Modified";
    public static final String SERVER = "Server";

    public static final String ACCEPT_RANGES = "Accept-Ranges";
    public static final String CONTENT_RANGE = "Content-Range";
    public static final String RANGE = "Range";

    public static final String LOCATION = "Location";
    public static final String ETAG = "ETag";
    public static final String VIA = "Via";

    public static final String X_FORWARDED_FOR = "X-Forwarded-For";
    public static final String X_ROBOTS_TAG = "X-Robots-Tag"; // see http://googleblog.blogspot.com/2007/07/robots-exclusion-protocol-now-with-even.html
    public static final String X_ROBOTS = "X-Robots";

    public static final String X_YACY_INDEX_CONTROL = "X-YaCy-Index-Control";
    /** Added when generating legacy request header to allow template servlets to know the original request scheme : "http" or "https" */
    public static final String X_YACY_REQUEST_SCHEME = "X-YaCy-Request-Scheme"; // TODO: after completing implementation of HttpServletRequest getScheme() should be used and this can be removed

    public static final String SET_COOKIE = "Set-Cookie";
    public static final String SET_COOKIE2 = "Set-Cookie2";
    public static final String EXPIRES = "Expires";

    public static final String CORS_ALLOW_ORIGIN = "Access-Control-Allow-Origin"; // Cross-Origin Resource Sharing properties (http://www.w3.org/TR/cors/)

    public static final String RESPONSE_TIME_MILLIS = "ResponseTimeMillis";

    /* =============================================================
     * Constants for content-encodings
     * ============================================================= */
    public static final String CONTENT_ENCODING_GZIP = "gzip";

    /* =============================================================
     * Constants defining http methods
     * ============================================================= */
    public static final String METHOD_GET = "GET";
    public static final String METHOD_HEAD = "HEAD";
    public static final String METHOD_POST = "POST";
    public static final String METHOD_CONNECT = "CONNECT";

    /*
     * constanst for metadata which is stored in the ResponseHeader
     */
    public static final String STATUS_CODE = "STATUS_CODE";

    /* =============================================================
     * defining default http status messages
     * ============================================================= */
    public static final Map<String, String> http1_0 = new ConcurrentHashMap<String, String>();
    static {
        http1_0.put("200", "OK");
        http1_0.put("201", "Created");
        http1_0.put("202", "Accepted");
        http1_0.put("204", "No Content");
        http1_0.put("300", "Multiple Choices");
        http1_0.put("301", "Moved Permanently");
        http1_0.put("302", "Moved Temporarily");
        http1_0.put("304", "Not Modified");
        http1_0.put("400", "Bad Request");
        http1_0.put("401", "Unauthorized");
        http1_0.put("403", "Forbidden");
        http1_0.put("404", "Not Found");
        http1_0.put("500", "Internal Server Error");
        http1_0.put("501", "Not Implemented");
        http1_0.put("502", "Bad Gateway");
        http1_0.put("503", "Service Unavailable");
    }
    public static final Map<String, String> http1_1 = new ConcurrentHashMap<String, String>();
    static {
        http1_1.putAll(http1_0);
        http1_1.put("100", "Continue");
        http1_1.put("101", "Switching Protocols");
        http1_1.put("203", "Non-Authoritative Information");
        http1_1.put("205", "Reset Content");
        http1_1.put("206", "Partial Content");
        http1_1.put("300", "Multiple Choices");
        http1_1.put("303", "See Other");
        http1_1.put("305", "Use Proxy");
        http1_1.put("307", "Temporary Redirect");
        http1_1.put("402", "Payment Required");
        http1_1.put("405", "Method Not Allowed");
        http1_1.put("406", "Not Acceptable");
        http1_1.put("407", "Proxy Authentication Required");
        http1_1.put("408", "Request Time-out");
        http1_1.put("409", "Conflict");
        http1_1.put("410", "Gone");
        http1_1.put("411", "Length Required");
        http1_1.put("412", "Precondition Failed");
        http1_1.put("413", "Request Entity Too Large");
        http1_1.put("414", "Request-URI Too Large");
        http1_1.put("415", "Unsupported Media Type");
        http1_1.put("416", "Requested range not satisfiable");
        http1_1.put("417", "Expectation Failed");
        http1_1.put("504", "Gateway Time-out");
        http1_1.put("505", "HTTP Version not supported");
    }

    /* PROPERTIES: General properties */
    // Properties are used to internally store or remember header values and additional connection information
    // One of the usages is in proxy operation to prepare header values to be set as header values upon connection
    //  * use of properties as header values is discouraged (e.g. as proxy transmits it as arbitrary headers) [2016-8-21]
    public static final String CONNECTION_PROP_HTTP_VER = "HTTP";
    public static final String CONNECTION_PROP_PROTOCOL = "PROTOCOL";
    public static final String CONNECTION_PROP_HOST = "HOST";
    public static final String CONNECTION_PROP_USER = "USER";
    public static final String CONNECTION_PROP_METHOD = "METHOD";
    public static final String CONNECTION_PROP_PATH = "PATH";
    public static final String CONNECTION_PROP_EXT = "EXT";
    // public static final String CONNECTION_PROP_ARGS = "ARGS"; // use getQueryString() or getParameter()
    public static final String CONNECTION_PROP_CLIENTIP = "CLIENTIP";
    public static final String CONNECTION_PROP_PERSISTENT = "PERSISTENT";
    public static final String CONNECTION_PROP_REQUEST_START = "REQUEST_START";
    public static final String CONNECTION_PROP_REQUEST_END = "REQUEST_END";

    /* PROPERTIES: Client -> Proxy */
    public static final String CONNECTION_PROP_DIGESTURL = "URL"; // value DigestURL object
    public static final String CONNECTION_PROP_CLIENT_HTTPSERVLETREQUEST = "CLIENT_HTTPSERVLETREQUEST";

    /* PROPERTIES: Proxy -> Client */
    public static final String CONNECTION_PROP_PROXY_RESPOND_CODE = "PROXY_RESPOND_CODE";
    public static final String CONNECTION_PROP_PROXY_RESPOND_STATUS = "PROXY_RESPOND_STATUS";
    public static final String CONNECTION_PROP_PROXY_RESPOND_HEADER = "PROXY_RESPOND_HEADER";
    public static final String CONNECTION_PROP_PROXY_RESPOND_SIZE = "PROXY_REQUEST_SIZE";

    public HeaderFramework() {
        super(ASCII.insensitiveASCIIComparator);
    }

    public HeaderFramework(final Map<String, String> othermap) {
        // creates a case insensitive map from another map
        super(ASCII.insensitiveASCIIComparator);

        // load with data
        if (othermap != null)
            putAll(othermap);
    }

    /** Date formatter/parser for standard compliant HTTP header dates (RFC 1123) */
    private static final String PATTERN_RFC1123 = "EEE, dd MMM yyyy HH:mm:ss Z"; // with numeric time zone indicator as defined in RFC5322
    private static final String PATTERN_RFC1036 = "EEEE, dd-MMM-yy HH:mm:ss zzz";
    private static final String PATTERN_ANSIC = "EEE MMM d HH:mm:ss yyyy";
    public static final SimpleDateFormat FORMAT_RFC1123 = new SimpleDateFormat(PATTERN_RFC1123, Locale.US);
    public static final SimpleDateFormat FORMAT_RFC1036 = new SimpleDateFormat(PATTERN_RFC1036, Locale.US);
    public static final SimpleDateFormat FORMAT_ANSIC = new SimpleDateFormat(PATTERN_ANSIC, Locale.US);
    private static final TimeZone TZ_GMT = TimeZone.getTimeZone("GMT");
    private static final Calendar CAL_GMT = Calendar.getInstance(TZ_GMT, Locale.US);

    /**
     * RFC 2616 requires that HTTP clients are able to parse all 3 different
     * formats. All times MUST be in GMT/UTC, but ...
     */
    private static final SimpleDateFormat[] FORMATS_HTTP = new SimpleDateFormat[] {
            // RFC 1123/822 (Standard) "Mon, 12 Nov 2007 10:11:12 GMT"
            FORMAT_RFC1123,
            // RFC 1036/850 (old)      "Monday, 12-Nov-07 10:11:12 GMT"
            FORMAT_RFC1036,
            // ANSI C asctime()        "Mon Nov 12 10:11:12 2007"
            FORMAT_ANSIC, };

    private static long lastRFC1123long = 0;
    private static String lastRFC1123string = "";

    public static final String formatRFC1123(final Date date) {
        if (date == null)
            return "";
        if (Math.abs(date.getTime() - lastRFC1123long) < 1000) {
            //System.out.println("date cache hit - " + lastRFC1123string);
            return lastRFC1123string;
        }
        synchronized (FORMAT_RFC1123) {
            final String s = FORMAT_RFC1123.format(date);
            lastRFC1123long = date.getTime();
            lastRFC1123string = s;
            return s;
        }
    }

    /** Initialization of static formats */
    static {
        // 2-digit dates are automatically parsed by SimpleDateFormat,
        // we need to detect the real year by adding 1900 or 2000 to
        // the year value starting with 1970
        CAL_GMT.setTimeInMillis(0);

        for (final SimpleDateFormat format : FORMATS_HTTP) {
            format.setTimeZone(TZ_GMT);
            format.set2DigitYearStart(CAL_GMT.getTime());
        }
    }

    /**
     * Parse a HTTP string representation of a date into a Date instance.
     * @param s The date String to parse.
     * @return The Date instance if successful, <code>null</code> otherwise.
     */
    public static Date parseHTTPDate(String s) {
        s = s.trim();
        if (s == null || s.length() < 9)
            return null;
        for (final SimpleDateFormat format : FORMATS_HTTP)
            synchronized (format) {
                try {
                    return format.parse(s);
                } catch (final ParseException e) {
                }
            }
        return null;
    }

    // to make the occurrence of multiple keys possible, we add them using a counter
    public String add(final String key, final String value) {
        final int c = keyCount(key);
        if (c == 0)
            return put(key, value);
        return put("*" + key + "-" + Integer.toString(c), value);
    }

    /**
     * Count occurence of header keys, look for original header name and a
     * numbered version of the header *headername-NUMBER , with NUMBER starting at 1
     * @param key the raw header name
     * @return number of headers with same name
     */
    public int keyCount(final String key) {
        if (!(containsKey(key)))
            return 0;
        int c = 1;
        final String h = "*" + key + "-";
        while (containsKey(h + Integer.toString(c)))
            c++;
        return c;
    }

    // a convenience method to access the map with fail-over defaults
    public String get(final String key, final String dflt) {
        final String result = get(key);
        if (result == null)
            return dflt;
        return result;
    }

    /**
     * Get one Header of headers with same name.
     * The headers are internally numbered
     * @param key the raw header name
     * @param count the number of the numbered header name (0 = same as get(key))
     * @return value of header with number=count
     */
    public String getSingle(final String key, final int count) {
        if (count == 0)
            return get(key); // first look for just the key
        return get("*" + key + "-" + count); // now for the numbered header names
    }

    /**
     * Get multiple header values with same header name.
     * The header names are internally numbered (format *key-1)
     * @param key the raw header name
     * @return header values
     */
    public String[] getMultiple(final String key) {
        final int count = keyCount(key);
        final String[] result = new String[count];
        for (int i = 0; i < count; i++)
            result[i] = getSingle(key, i);
        return result;
    }

    // convenience methods for storing and loading to a file system
    public void store(final File f) throws IOException {
        FileOutputStream fos = null;
        try {
            fos = new FileOutputStream(f);
            for (final java.util.Map.Entry<String, String> entry : entrySet()) {
                fos.write(UTF8.getBytes(entry.getKey() + "=" + entry.getValue() + "\r\n"));
            }
            fos.flush();
        } finally {
            if (fos != null)
                try {
                    fos.close();
                } catch (final Exception e) {
                }
        }
    }

    @Override
    public String toString() {
        return super.toString();
    }

    /*
     * example header
      Connection=close
      Content-Encoding=gzip
      Content-Length=7281
      Content-Type=text/html; charset=UTF-8
      Date=Mon, 05 Jan 2004 11:55:10 GMT
      Server=Apache/1.3.26
    */

    /**
     * Get mime type from header field content-type
     * stripps any parameter (denoted by ';' see RFC 2616)
     * @return mime or on missing header field "application/octet-stream"
     */
    public String mime() {
        final String tmpstr = this.get(CONTENT_TYPE, "application/octet-stream");
        final int pos = tmpstr.indexOf(';');
        if (pos > 0) {
            return tmpstr.substring(0, pos).trim();
        } else {
            return tmpstr;
        }
    }

    /*
     * (non-Javadoc)
     *
     * @see
     * org.apache.commons.fileupload.RequestContext#getCharacterEncoding()
     */
    public String getCharacterEncoding() {
        final String mimeType = getContentType();
        if (mimeType == null)
            return null;

        final String[] parts = CommonPattern.SEMICOLON.split(mimeType);
        if (parts == null || parts.length <= 1)
            return null;

        for (int i = 1; i < parts.length; i++) {
            final String param = parts[i].trim();
            if (param.startsWith("charset=")) {
                String charset = param.substring("charset=".length()).trim();
                if (charset.length() > 0 && (charset.charAt(0) == '\"' || charset.charAt(0) == '\''))
                    charset = charset.substring(1);
                if (charset.endsWith("\"") || charset.endsWith("'"))
                    charset = charset.substring(0, charset.length() - 1);
                return charset.trim();
            }
        }

        return null;
    }

    /*
     * (non-Javadoc)
     *
     * @see org.apache.commons.fileupload.RequestContext#getContentLength()
     */
    public int getContentLength() {
        if (containsKey(CONTENT_LENGTH)) {
            try {
                return (int) Long.parseLong(get(CONTENT_LENGTH));
            } catch (final NumberFormatException e) {
                ConcurrentLog.warn("HeaderFramework", "content-length cannot be parsed: " + get(CONTENT_LENGTH));
                return -1;
            }
        }
        return -1;
    }

    /*
     * provide method, which can handle big filelengths (for example from ftp)
     * because we can't change the interface in apache httpclient
     *
     * @see org.apache.commons.fileupload.RequestContext#getContentLength()
     */
    public long getContentLengthLong() {
        if (containsKey(CONTENT_LENGTH)) {
            try {
                return Long.parseLong(get(CONTENT_LENGTH));
            } catch (final NumberFormatException e) {
                return -1;
            }
        }
        return -1;
    }

    /**
     * Get header field content-type (unmodified)
     * which may include additional parameter (RFC 2616)
     * see also mime()
     * @see org.apache.commons.fileupload.RequestContext#getContentType()
     */
    public String getContentType() {
        return this.get(CONTENT_TYPE);
    }

    protected Date headerDate(final String kind) {
        if (containsKey(kind)) {
            Date parsedDate = parseHTTPDate(get(kind));
            if (parsedDate == null)
                return null;
            return parsedDate;
        }
        return null;
    }

    public StringBuilder toHeaderString(final String httpVersion, final int httpStatusCode,
            final String httpStatusText) {
        // creating a new buffer to store the header as string
        final StringBuilder theHeader = new StringBuilder(180);

        // generating the header string
        this.toHeaderString(httpVersion, httpStatusCode, httpStatusText, theHeader);

        // returning the result
        return theHeader;
    }

    public void toHeaderString(String httpVersion, final int httpStatusCode, String httpStatusText,
            final StringBuilder theHeader) {

        if (theHeader == null)
            throw new IllegalArgumentException();

        // setting the http version if it was not already set
        if (httpVersion == null)
            httpVersion = HTTP_VERSION_1_0;

        // setting the status text if it was not already set
        if ((httpStatusText == null) || (httpStatusText.length() == 0)) {
            // http1_1 contains all status code text
            if (HeaderFramework.http1_1.containsKey(Integer.toString(httpStatusCode)))
                httpStatusText = HeaderFramework.http1_1.get(Integer.toString(httpStatusCode));
            else
                httpStatusText = "Unknown";
        }

        // write status line
        theHeader.append(httpVersion).append(" ").append(Integer.toString(httpStatusCode)).append(" ")
                .append(httpStatusText).append("\r\n");

        // write header
        final Iterator<String> i = keySet().iterator();
        String key;
        char tag;
        int count;
        while (i.hasNext()) {
            key = i.next();
            tag = key.charAt(0);
            if ((tag != '*') && (tag != '#')) { // '#' in key is reserved for proxy attributes as artificial header values
                count = keyCount(key);
                for (int j = 0; j < count; j++) {
                    theHeader.append(key).append(": ").append(getSingle(key, j)).append("\r\n");
                }
            }
        }
        // end header
        theHeader.append("\r\n");
    }

    /**
     * Reading http headers from a reader class and building up a httpHeader object
     * @param reader the {@link BufferedReader} that is used to read the http header lines
     * @return a {@link HeaderFramework}-Object containing all parsed headers
     * @throws IOException
     */
    public void readHttpHeader(final BufferedReader reader) throws IOException {
        // reading all request headers
        int p;
        String line;
        while ((line = reader.readLine()) != null) {
            if (line.isEmpty())
                break;
            if ((p = line.indexOf(':')) >= 0) {
                // store a property
                add(line.substring(0, p).trim(), line.substring(p + 1).trim());
            }
        }
    }
}