au.edu.rmit.GalagoSearchClient.java Source code

Java tutorial

Introduction

Here is the source code for au.edu.rmit.GalagoSearchClient.java

Source

package au.edu.rmit;

import java.io.IOException;
import java.io.StringWriter;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;

import org.apache.commons.lang3.StringUtils;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.fluent.Request;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;

/** 
 * Copyright 2015 RMIT University.<br>
 *
 * Licensed under the terms of the MIT license. Please see LICENSE file at the
 * root of this project for terms. </p>
 * 
 * @author rueycheng
 * 
 */
public class GalagoSearchClient {
    protected String host;
    protected int port;

    public class Result {
        public String identifier;
    }

    public GalagoSearchClient(String host, int port) {
        this.host = host;
        this.port = port;
    }

    protected NodeList xpathGetNodeList(Document doc, String expr)
            throws XPathExpressionException, UnsupportedEncodingException {
        XPath xpath = XPathFactory.newInstance().newXPath();
        return (NodeList) xpath.compile(expr).evaluate(doc, XPathConstants.NODESET);
    }

    protected Node xpathGetNode(Document doc, String expr)
            throws XPathExpressionException, UnsupportedEncodingException {
        XPath xpath = XPathFactory.newInstance().newXPath();
        return (Node) xpath.compile(expr).evaluate(doc, XPathConstants.NODE);
    }

    protected Document runSearchXML(String query, int count)
            throws ParserConfigurationException, UnsupportedEncodingException {
        DocumentBuilder builder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
        XPath xpath = XPathFactory.newInstance().newXPath();

        String baseURI = "http://" + host + ":" + Integer.toString(port) + "/searchxml";
        String uri = baseURI + "?q=" + URLEncoder.encode(query, "UTF-8") + "&n=" + Integer.toString(count);

        Document result;

        try {
            result = builder.parse(uri);
        } catch (Exception e) {
            e.printStackTrace();
            result = builder.newDocument();
        }

        return result;
    }

    protected Document runSnippet(String id, List<String> terms)
            throws ParserConfigurationException, UnsupportedEncodingException {
        DocumentBuilder builder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
        XPath xpath = XPathFactory.newInstance().newXPath();

        String baseURI = "http://" + this.host + ":" + Integer.toString(this.port) + "/snippet";
        String uri = baseURI + "?identifier=" + id;
        for (String term : terms)
            uri += "&term=" + URLEncoder.encode(term, "UTF-8");

        Document result;

        try {
            result = builder.parse(uri);
        } catch (Exception e) {
            e.printStackTrace();
            result = builder.newDocument();
        }

        return result;
    }

    protected String getElementContent(Element elem) throws TransformerException {
        StringWriter writer = new StringWriter();
        Transformer transformer = TransformerFactory.newInstance().newTransformer();
        transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
        transformer.transform(new DOMSource(elem), new StreamResult(writer));
        return writer.toString();
    }

    public String search(String query) {
        String baseURI = "http://" + this.host + ":" + Integer.toString(this.port) + "/searchxml";
        String result = "[NO RESULT]";

        if (query == null)
            return result;

        try {
            query = query.replaceAll("[#():=.@\"]+", " ").toLowerCase().trim();
            Document response = runSearchXML(query, 20);

            List<String> identifiers = new ArrayList<String>();
            NodeList nodeList = xpathGetNodeList(response, "/response/result/identifier/text()");
            for (int i = 0; i < nodeList.getLength(); ++i)
                identifiers.add(nodeList.item(i).getNodeValue());

            if (identifiers.size() > 0) {
                result = identifiers.get(0);
                response = runSnippet(identifiers.get(0), Arrays.asList(query.split(" ")));
                Element elem = (Element) xpathGetNode(response, "/response/snippet");
                result = getElementContent(elem).replaceAll("</?\\w+?>", "");
            }
        } catch (Exception e) {
            e.printStackTrace();
            result = "error: " + e.getMessage();
        }

        return result;
    }
}