org.apache.nutch.searcher.response.SearchServlet.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.nutch.searcher.response.SearchServlet.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.nutch.searcher.response;

import java.io.IOException;

import javax.servlet.ServletConfig;
import javax.servlet.ServletException;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;

import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.nutch.searcher.Hit;
import org.apache.nutch.searcher.HitDetails;
import org.apache.nutch.searcher.Hits;
import org.apache.nutch.searcher.NutchBean;
import org.apache.nutch.searcher.Query;
import org.apache.nutch.searcher.Summary;
import org.apache.nutch.util.NutchConfiguration;

/**
 * Servlet that allows returning search results in multiple different formats
 * through a ResponseWriter Nutch extension point.
 * 
 * @see org.apache.nutch.searcher.response.ResponseWriter
 */
public class SearchServlet extends HttpServlet {

    public static final Log LOG = LogFactory.getLog(SearchServlet.class);
    private NutchBean bean;
    private Configuration conf;
    private ResponseWriters writers;

    private String defaultRespType = "xml";
    private String defaultLang = null;
    private int defaultNumRows = 10;
    private String defaultDedupField = "site";
    private int defaultNumDupes = 1;

    public static final String RESPONSE_TYPE = "rt";
    public static final String QUERY = "query";
    public static final String LANG = "lang";
    public static final String START = "start";
    public static final String ROWS = "rows";
    public static final String SORT = "sort";
    public static final String REVERSE = "reverse";
    public static final String DEDUPE = "ddf";
    public static final String NUM_DUPES = "dupes";
    public static final String SUMMARY = "summary";
    public static final String FIELDS = "field";

    /**
     * Initializes servlet configuration default values.  Gets NutchBean and 
     * ResponseWriters.
     */
    public void init(ServletConfig config) throws ServletException {

        // set sensible defaults for response writer values and cache NutchBean.
        // Also get and cache all ResponseWriter implementations.
        super.init(config);
        try {
            this.conf = NutchConfiguration.get(config.getServletContext());
            this.defaultRespType = conf.get("search.response.default.type", "xml");
            this.defaultLang = conf.get("search.response.default.lang");
            this.defaultNumRows = conf.getInt("search.response.default.numrows", 10);
            this.defaultDedupField = conf.get("search.response.default.dedupfield", "site");
            this.defaultNumDupes = conf.getInt("search.response.default.numdupes", 1);
            bean = NutchBean.get(config.getServletContext(), this.conf);
            writers = new ResponseWriters(conf);
        } catch (IOException e) {
            throw new ServletException(e);
        }
    }

    /**
     * Forwards all responses to doGet.
     */
    protected void doPost(HttpServletRequest request, HttpServletResponse response)
            throws ServletException, IOException {
        doGet(request, response);
    }

    /**
     * Handles all search requests.  Gets parameter input.  Does the search and 
     * gets Hits, details, and summaries.  Passes off to ResponseWriter classes
     * to writer different output formats directly to HttpServletResponse.
     */
    protected void doGet(HttpServletRequest request, HttpServletResponse response)
            throws ServletException, IOException {

        if (NutchBean.LOG.isInfoEnabled()) {
            NutchBean.LOG.info("Query request from " + request.getRemoteAddr());
        }

        // get the response type, used to call the correct ResponseWriter
        String respType = RequestUtils.getStringParameter(request, RESPONSE_TYPE, defaultRespType);
        ResponseWriter writer = writers.getResponseWriter(respType);
        if (writer == null) {
            throw new IOException("Unknown response type " + respType);
        }

        // get the query
        String query = RequestUtils.getStringParameter(request, QUERY);
        if (StringUtils.isBlank(query)) {
            throw new IOException("Query cannot be empty!");
        }

        // get the language from parameter, then request, then finally configuration
        String lang = RequestUtils.getStringParameter(request, LANG);
        if (StringUtils.isBlank(lang)) {
            lang = request.getLocale().getLanguage();
            if (StringUtils.isBlank(lang)) {
                lang = defaultLang;
            }
        }

        // get various other search parameters, fields allows only returning a 
        // given set of fields
        boolean withSummary = RequestUtils.getBooleanParameter(request, SUMMARY, true);
        String sort = RequestUtils.getStringParameter(request, SORT);
        int start = RequestUtils.getIntegerParameter(request, START, 0);
        int rows = RequestUtils.getIntegerParameter(request, ROWS, defaultNumRows);
        boolean reverse = RequestUtils.getBooleanParameter(request, REVERSE, false);
        String dedup = RequestUtils.getStringParameter(request, DEDUPE, defaultDedupField);
        int numDupes = RequestUtils.getIntegerParameter(request, NUM_DUPES, defaultNumDupes);
        String[] fields = request.getParameterValues(FIELDS);

        // parse out the query
        Query queryObj = Query.parse(query, lang, this.conf);
        if (NutchBean.LOG.isInfoEnabled()) {
            NutchBean.LOG.info("query: " + query);
            NutchBean.LOG.info("lang: " + lang);
        }

        // search and return hits
        Hits hits;
        try {
            hits = bean.search(queryObj, start + rows, numDupes, dedup, sort, reverse);
        } catch (IOException e) {
            if (NutchBean.LOG.isWarnEnabled()) {
                NutchBean.LOG.warn("Search Error", e);
            }
            hits = new Hits(0, new Hit[0]);
        }

        // get the total number of hits, the hits to show, and the hit details
        long totalHits = hits.getTotal();
        int end = (int) Math.min(hits.getLength(), start + rows);
        int numHits = (end > start) ? (end - start) : 0;
        Hit[] show = hits.getHits(start, numHits);
        HitDetails[] details = bean.getDetails(show);

        // setup the SearchResults object, used in response writing
        SearchResults results = new SearchResults();
        results.setResponseType(respType);
        results.setQuery(query);
        results.setLang(lang);
        results.setSort(sort);
        results.setReverse(reverse);
        results.setStart(start);
        results.setRows(rows);
        results.setEnd(end);
        results.setTotalHits(totalHits);
        results.setHits(show);
        results.setDetails(details);

        // are we returning summaries with results, if not avoid network hit
        if (withSummary) {
            Summary[] summaries = bean.getSummary(details, queryObj);
            results.setSummaries(summaries);
            results.setWithSummary(true);
        } else {
            results.setWithSummary(false);
        }

        // set return fields if any specified, if not all fields are returned
        if (fields != null && fields.length > 0) {
            results.setFields(fields);
        }

        // call the response writer to write out content to HttpResponse directly
        writer.writeResponse(results, request, response);
    }
}