com.mycompany.searchengineaggregator.SearchEngineAggregator.java Source code

Java tutorial

Introduction

Here is the source code for com.mycompany.searchengineaggregator.SearchEngineAggregator.java

Source

/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */
package com.mycompany.searchengineaggregator;

import java.io.IOException;
import javax.ws.rs.core.Context;
import javax.ws.rs.core.UriInfo;
import javax.ws.rs.Produces;
import javax.ws.rs.Consumes;
import javax.ws.rs.GET;
import javax.ws.rs.Path;
import javax.ws.rs.PathParam;
import javax.ws.rs.PUT;
import javax.ws.rs.core.MediaType;

import java.util.ArrayList;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.json.JSONException;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.json.JSONObject;

import java.util.Collections;
import java.util.Comparator;

/**
 * REST Web Service
 *
 * @author hanasuhail
 */
@Path("searchengineaggregator")
public class SearchEngineAggregator {

    @Context
    private UriInfo context;
    static final String GoogleUserAgent = "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)";
    static final String YahooUserAgent = "Mozilla/5.0 (compatible; Yahoo! Slurp; http://help.yahoo.com/help/us/ysearch/slurp)";
    static final String BingUserAgent = "Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bot.htm)";

    public enum SearchEngine {

        Google {
            @Override
            public String toString() {
                return "Google";
            }
        },
        Yahoo {
            @Override
            public String toString() {
                return "Yahoo";
            }
        },
        Bing {
            @Override
            public String toString() {
                return "Bing";
            }
        }

    }

    /**
     * Creates a new instance of SearchEngineAggregator
     */
    public SearchEngineAggregator() {
    }

    /**
     * Retrieves representation of an instance of com.mycompany.searchengineaggregator.SearchEngineAggregator
     * @param query
     * @return an instance of java.lang.String
     * @throws org.json.JSONException
     */
    @GET
    @Path("{query}")
    @Produces(MediaType.TEXT_HTML)
    public String getHtml(@PathParam("query") String query) throws JSONException {

        return compileResults(getSearchResults(query, SearchEngine.Google),
                getSearchResults(query, SearchEngine.Yahoo), getSearchResults(query, SearchEngine.Bing));
    }

    /**
     * PUT method for updating or creating an instance of SearchEngineAggregator
     * @param content representation for the resource
     */
    @PUT
    @Consumes(MediaType.TEXT_HTML)
    public void putHtml(String content) {
    }

    /* Returns HashSet of query results of type JSONObject from Google */
    public ArrayList<JSONObject> getSearchResults(String query, SearchEngine searchEngine) throws JSONException {

        ArrayList<JSONObject> result = new ArrayList<>();
        StringBuilder url = new StringBuilder();
        Document doc = null;
        String userAgent = null;

        //Handles specific search engine connection details
        switch (searchEngine) {
        case Google:
            url.append("https://www.google.com/search?q=");
            url.append(query);
            userAgent = GoogleUserAgent;
            break;
        case Yahoo:
            url.append("https://search.yahoo.com/search?q=");
            url.append(query);
            userAgent = YahooUserAgent;
            break;
        case Bing:
            url.append("http://www.bing.com/search?q=");
            url.append(query);
            userAgent = BingUserAgent;
            break;

        }

        if (userAgent == null)
            return null;

        try {
            //Creates a connection, and fetches and parses the HTML file
            doc = Jsoup.connect(url.toString()).data("query", "Java").userAgent(userAgent).cookie("auth", "token")
                    .timeout(3000).get();

        } catch (IOException ex) {
            Logger.getLogger(SearchEngineAggregator.class.getName()).log(Level.SEVERE, null, ex);
        }

        if (doc == null)
            return null;

        //Get all links with attribute href
        Elements links = doc.select("a[href]");
        int i = 1;

        //For every link, check if url was outbound, strip non-url substring and append to result
        for (Element link : links) {

            String tempLink = link.attr("href");
            String tempText = link.text();

            //Handles specific search engine result filters
            switch (searchEngine) {
            case Google:
                if (tempLink.startsWith("/url?q=") && !tempText.equals("Cached")) {
                    tempLink = tempLink.replace("/url?q=", "");
                    result.add(toSearchResultJSONObject(searchEngine.toString(), i, tempText, tempLink));
                    i++;
                }
                break;
            case Yahoo:
                if (tempLink.startsWith("http") && !tempText.equals("Cached")) {
                    result.add(toSearchResultJSONObject(searchEngine.toString(), i, tempText, tempLink));
                    i++;
                    break;
                }
            case Bing:
                if (tempLink.startsWith("http") && !tempText.equals("Cached")) {
                    result.add(toSearchResultJSONObject(searchEngine.toString(), i, tempText, tempLink));
                    i++;
                    break;
                }
            }

        }

        return result;

    }

    /* Returns JSONObject with all passed data */
    public JSONObject toSearchResultJSONObject(String searchEngine, int resultNumber, String text, String url)
            throws JSONException {
        JSONObject temp = new JSONObject();
        temp.put("search engine", searchEngine);
        temp.put("result number", resultNumber);
        temp.put("text", text);
        temp.put("url", url);

        return temp;
    }

    /* Returns string of all SearchResult objects converted to JSON, from each search engine */
    public String compileResults(ArrayList<JSONObject> result1, ArrayList<JSONObject> result2,
            ArrayList<JSONObject> result3) {

        StringBuilder finalResult = new StringBuilder();
        ArrayList<JSONObject> combinedResults = new ArrayList<>(result1.size() + result2.size() + result3.size());

        combinedResults.addAll(result1);
        combinedResults.addAll(result2);
        combinedResults.addAll(result3);

        //Sorts combined list so that all results appear in order of "hit"
        //E.g. Google's first result, Yahoo's first result, Bing's first result, etc.
        Collections.sort(combinedResults, new Comparator<JSONObject>() {

            @Override
            public int compare(JSONObject obj1, JSONObject obj2) {
                try {
                    if (obj1.getInt("result number") < obj2.getInt("result number")) {
                        return -1;
                    } else if (obj1.getInt("result number") == obj2.getInt("result number")) {
                        return 0;
                    } else {
                        return 1;
                    }
                } catch (JSONException ex) {
                    Logger.getLogger(SearchEngineAggregator.class.getName()).log(Level.SEVERE, null, ex);
                }

                return -1;
            }
        });

        finalResult.append("\n[");

        //Appends all results from each search engine to the final result string
        for (JSONObject s : combinedResults) {
            finalResult.append(s.toString());
            finalResult.append(",");
        }

        //Replace last comma with ']'
        finalResult.replace(finalResult.length() - 1, finalResult.length(), "]");

        return finalResult.toString();
    }

}