Java tutorial
/** Copyright (c) 2013-2014 Alexander Savochkin Chemical wikipedia search (chwise.net) web-site source code This file is part of ChWiSe.Net infrastructure. ChWiSe.Net infrastructure is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see <http://www.gnu.org/licenses/>. */ package net.chwise.websearch; import net.chwise.documents.HighlightedFragmentsRetriever; import net.chwise.index.ConfigurableDirectorySource; import org.apache.commons.lang3.StringUtils; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexableField; import org.apache.lucene.queryparser.classic.MultiFieldQueryParser; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopScoreDocCollector; import org.apache.lucene.search.highlight.InvalidTokenOffsetsException; import org.apache.lucene.store.Directory; import org.apache.lucene.util.Version; import org.apache.lucene.queryparser.classic.ParseException; import org.json.JSONArray; import org.json.JSONException; import org.json.JSONObject; import org.openscience.cdk.exception.InvalidSmilesException; import javax.servlet.http.HttpServlet; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import java.io.IOException; import java.io.PrintWriter; import java.util.logging.Level; import java.util.logging.Logger; import java.lang.Math; import static net.chwise.common.document.DocDefinitions.*; public class SearchServlet extends HttpServlet { private final static Logger LOGGER = Logger.getLogger(SearchServlet.class.getName()); private ConfigurableDirectorySource directorySource = new ConfigurableDirectorySource(); @Override public void doGet(HttpServletRequest req, HttpServletResponse resp) throws IOException { String queryText = req.getParameter("q"); if (queryText == null) queryText = ""; String[] smilesQueriesString = req.getParameterValues("sq"); //Join text query with structures query StringBuffer sb = new StringBuffer(); boolean nonEmptyQuery = isQuery(queryText); if (nonEmptyQuery) sb.append(queryText); if (smilesQueriesString != null) { for (String structSmiles : smilesQueriesString) { if (!isQuery(structSmiles)) continue; String escapedSmiles = QueryParser.escape(structSmiles); if (nonEmptyQuery) { sb.append(" AND "); } sb.append(" smiles:"); sb.append(escapedSmiles); nonEmptyQuery = true; } } String joinedTextChemicalQuery = sb.toString(); LOGGER.log(Level.INFO, "Query: {0}", joinedTextChemicalQuery); int from = 0; int numShow = 10; String strFrom = req.getParameter("from"); String strNumShow = req.getParameter("numShow"); if (strFrom != null) from = Integer.parseInt(strFrom); if (strNumShow != null) numShow = Math.min(Integer.parseInt(strNumShow), 20); int to = from + numShow; Integer[] fromTo = { new Integer(from), new Integer(to) }; LOGGER.log(Level.INFO, "Requested results range: from {0} to {1}", fromTo); JSONObject jsonResponse = new JSONObject(); JSONArray jsonResult = new JSONArray(); try { //Preapre for search String directorySourceClassName = getServletConfig().getInitParameter("directorySourceClassName"); String directorySourceParams = getServletConfig().getInitParameter("directorySourceParams"); Directory directory = directorySource.getDirectory(directorySourceClassName, directorySourceParams); IndexReader reader = null; reader = IndexReader.open(directory); IndexSearcher searcher = new IndexSearcher(reader); //Perform query Query query = null; Analyzer analyzer = getAnalyzer(); query = new MultiFieldQueryParser(Version.LUCENE_43, getTextFields(), analyzer, getFieldWeights()) .parse(joinedTextChemicalQuery); TopScoreDocCollector collector = TopScoreDocCollector.create(to, true); //TODO: use from, to searcher.search(query, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; int totalResults = collector.getTotalHits(); LOGGER.log(Level.INFO, "Found {0} documents", hits.length); //Wrap results into json object HighlightedFragmentsRetriever highlighter = new HighlightedFragmentsRetriever(); to = Math.min(to, hits.length); for (int i = from; i < to; ++i) { ScoreDoc hit = hits[i]; Document foundDoc = searcher.doc(hit.doc); JSONObject jsonDoc = extractJSON(query, analyzer, highlighter, foundDoc); jsonResult.put(jsonDoc); } jsonResponse.put("result", jsonResult); jsonResponse.put("total", totalResults); } catch (ParseException e) { JSONObject jsonFailure = SearchFailureJSONResponse.create("info", "We couldn't understand query", "Use quotes for phrase search. Use AND,OR,NOT for boolean search"); try { jsonResponse.put("failure", jsonFailure); } catch (JSONException e1) { e1.printStackTrace(); throw new RuntimeException(e1); } } catch (RuntimeException e) { if (e.getCause() instanceof InvalidSmilesException) { JSONObject jsonFailure = SearchFailureJSONResponse.create("info", "We couldn't understand query", "Your structure formula doesn't seem like correct SMILES. Use structure editor for generating correct SMILES structures"); try { jsonResponse.put("failure", jsonFailure); } catch (JSONException e1) { e1.printStackTrace(); throw new RuntimeException(e1); } } else { e.printStackTrace(); throw e; } } catch (Exception e) { e.printStackTrace(); throw new RuntimeException("Exception in servlet SearchServlet", e); } resp.setContentType("application/json"); PrintWriter out = resp.getWriter(); out.print(jsonResponse); out.flush(); } private JSONObject extractJSON(Query query, Analyzer analyzer, HighlightedFragmentsRetriever highlighter, Document foundDoc) throws IOException, InvalidTokenOffsetsException, JSONException { JSONObject jsonDoc = new JSONObject(); String title = foundDoc.getField(TITLE_FIELD_NAME).stringValue(); String url = "#"; String textFragment = foundDoc.getField(TEXT_FIELD_NAME).stringValue(); String smiles = foundDoc.getField(STRUCTURE_SMILES_FIELD_NAME).stringValue(); String mdlmol = foundDoc.getField(STRUCTURE_MOL_FIELD_NAME).stringValue(); IndexableField f = foundDoc.getField(CAS_NO); String casno = f == null ? null : f.stringValue(); f = foundDoc.getField(PUBCHEM_ID); String pubChemId = f == null ? null : f.stringValue(); f = foundDoc.getField(CHEMSPIDER); String chemSpiderId = f == null ? null : f.stringValue(); f = foundDoc.getField(CHEBI); String chebi = f == null ? null : f.stringValue(); JSONArray jsonSynonymsArray = new JSONArray(); IndexableField[] synonymFields = foundDoc.getFields(SYNONYM_FIELD_NAME); for (IndexableField field : synonymFields) { String synonym = field.stringValue(); jsonSynonymsArray.put(synonym); } //Highlight and fragment text String[] documentTextFragments = highlighter.getFragmentsWithHighlightedTerms(analyzer, query, TEXT_FIELD_NAME, textFragment, 3, 200); String textFragmentsJoined = StringUtils.join(documentTextFragments, " ... "); jsonDoc.put("title", title); jsonDoc.put("textFragment", textFragmentsJoined); jsonDoc.put("url", url); jsonDoc.put("smiles", smiles); jsonDoc.put("mdlmol", mdlmol); jsonDoc.put("synonyms", jsonSynonymsArray); JSONObject externalIdsDictionary = new JSONObject(); if (casno != null) externalIdsDictionary.put("cas", casno); if (pubChemId != null) externalIdsDictionary.put("pubchem", pubChemId); if (chemSpiderId != null) externalIdsDictionary.put("chemspider", chemSpiderId); if (chebi != null) externalIdsDictionary.put("chebi", chebi); jsonDoc.put("externalrefs", externalIdsDictionary); return jsonDoc; } boolean isQuery(String query) { //Check that query contains alphanumeric character(s) if (query.matches(".*\\w.*")) return true; return false; } }