no.sesat.search.http.servlet.BoomerangServlet.java Source code

Java tutorial

Introduction

Here is the source code for no.sesat.search.http.servlet.BoomerangServlet.java

Source

/*
 * Copyright (2006-2012) Schibsted ASA
 * This file is part of Possom.
 *
 *   Possom is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU Lesser General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   Possom is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU Lesser General Public License for more details.
 *
 *   You should have received a copy of the GNU Lesser General Public License
 *   along with Possom.  If not, see <http://www.gnu.org/licenses/>.
 */
package no.sesat.search.http.servlet;

import java.io.UnsupportedEncodingException;
import java.util.logging.Level;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import javax.servlet.ServletException;
import java.io.IOException;
import java.net.URLDecoder;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.StringTokenizer;
import java.util.regex.Pattern;
import no.sesat.search.datamodel.DataModel;
import no.sesat.search.datamodel.generic.StringDataObject;
import org.apache.commons.lang.StringEscapeUtils;
import org.apache.log4j.Logger;

/**
 * Provides the user-statistics logging in Possom.
 * Links are logged with <b>ceremonial</b> boomerangs that come back (ie with a redirect response).
 * Javascript functionality (or user behavour) is logged with <b>hunting</b> boomerangs that do not come back.
 *
 * A cermonial example is:
 * http://sesam.no/boomerang/category=results;subcategory=main/http://wever.org
 *
 * A hunting example is:
 * http://sesam.no/hunting/?parameter-list
 *
 * @version <tt>$Id: 3361 $</tt>
 *
 */
public final class BoomerangServlet extends HttpServlet {

    private static final Logger LOG = Logger.getLogger(BoomerangServlet.class);
    private static final Logger ACCESS = Logger.getLogger("no.sesat.Access");

    private static final String CEREMONIAL = "/boomerang/";

    private static final Pattern ROBOTS = Pattern.compile("(Googlebot|Slurp|Crawler|Bot)",
            Pattern.CASE_INSENSITIVE);

    @Override
    public void destroy() {
    }

    @Override
    public void init() {
    }

    @Override
    protected void doGet(final HttpServletRequest req, final HttpServletResponse res)
            throws ServletException, IOException {

        // clients must not cache these requests
        res.setHeader("Cache-Control", "no-cache, must-revalidate, post-check=0, pre-check=0");
        res.setHeader("Pragma", "no-cache"); // for old browsers
        res.setDateHeader("Expires", 0); // to be double-safe

        // entrails is the map of logging information
        final Map<String, Object> entrails = new HashMap<String, Object>();

        // request attribute to keep
        entrails.put("referer", req.getHeader("Referer"));
        entrails.put("method", req.getMethod());
        entrails.put("ipaddress", req.getRemoteAddr());
        entrails.put("user-agent", req.getHeader("User-Agent"));
        entrails.put("user-id", SearchServlet.getCookieValue(req, "SesamID"));
        entrails.put("user", SearchServlet.getCookieValue(req, "SesamUser"));

        if (req.getRequestURI().startsWith(CEREMONIAL)) {

            // ceremonial boomerang
            final StringBuffer url = req.getRequestURL();
            if (null != req.getQueryString()) {
                url.append('?' + req.getQueryString());
            }

            // pick out the entrails
            final int boomerangStart = url.indexOf(CEREMONIAL) + CEREMONIAL.length();

            try {
                final String grub = url.substring(boomerangStart, url.indexOf("/", boomerangStart));
                LOG.debug(grub);

                // the url to return to
                final String destination = url
                        .substring(url.indexOf("/", url.indexOf(CEREMONIAL) + CEREMONIAL.length() + 1) + 1);

                // the grub details to add
                if (0 < grub.length()) {
                    final StringTokenizer tokeniser = new StringTokenizer(grub, ";");
                    while (tokeniser.hasMoreTokens()) {
                        final String[] entry = tokeniser.nextToken().split("=");
                        entrails.put(entry[0], 1 < entry.length ? entry[1] : entry[0]);
                    }
                }
                entrails.put("boomerang", destination);
                kangerooGrub(entrails);

                LOG.debug("Ceremonial boomerang to " + destination.toString());

                if (ROBOTS.matcher(req.getHeader("User-agent")).find()) {
                    // robots like permanent redirects. and we're not interested in their clicks so ok to cache.
                    res.setStatus(HttpServletResponse.SC_MOVED_PERMANENTLY);
                    res.setHeader("Location", destination.toString());
                    res.setHeader("Connection", "close");

                } else {
                    // default behaviour for users.
                    res.sendRedirect(destination.toString());
                }

            } catch (StringIndexOutOfBoundsException sioobe) {
                // SEARCH-4668
                LOG.error("Boomerang url not to standard --> " + url);
                LOG.debug(sioobe.getMessage(), sioobe);
            }

        } else {

            // hunting boomerang, just grub, and the grub comes as clean parameters.
            final DataModel datamodel = (DataModel) req.getSession().getAttribute(DataModel.KEY);
            entrails.putAll(datamodel.getParameters().getValues());
            kangerooGrub(entrails);

        }

    }

    private void kangerooGrub(final Map<String, ?> params) {

        final List<String> paramKeys = new ArrayList<String>(params.keySet());

        Collections.sort(paramKeys);

        final StringBuilder bob = new StringBuilder("<boomerang>");

        for (String key : paramKeys) {
            try {

                final String value = params.get(key) instanceof StringDataObject
                        ? ((StringDataObject) params.get(key)).getXmlEscaped()
                        : StringEscapeUtils.escapeXml((String) params.get(key));

                final String keyEscaped = StringEscapeUtils.escapeXml(URLDecoder.decode(key, "UTF-8"));

                bob.append("<parameter key=\"" + keyEscaped + "\" value=\"" + value + "\"/>");

            } catch (UnsupportedEncodingException ex) {
                LOG.error("Failed to kangerooGrub " + key, ex);
            }
        }
        bob.append("</boomerang>");
        ACCESS.info(bob.toString());
    }
}