Java tutorial
/* * To change this license header, choose License Headers in Project Properties. * To change this template file, choose Tools | Templates * and open the template in the editor. */ package com.mycompany.searchengineaggregator; import java.io.IOException; import javax.ws.rs.core.Context; import javax.ws.rs.core.UriInfo; import javax.ws.rs.Produces; import javax.ws.rs.Consumes; import javax.ws.rs.GET; import javax.ws.rs.Path; import javax.ws.rs.PathParam; import javax.ws.rs.PUT; import javax.ws.rs.core.MediaType; import java.util.ArrayList; import java.util.logging.Level; import java.util.logging.Logger; import org.json.JSONException; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import org.json.JSONObject; import java.util.Collections; import java.util.Comparator; /** * REST Web Service * * @author hanasuhail */ @Path("searchengineaggregator") public class SearchEngineAggregator { @Context private UriInfo context; static final String GoogleUserAgent = "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"; static final String YahooUserAgent = "Mozilla/5.0 (compatible; Yahoo! Slurp; http://help.yahoo.com/help/us/ysearch/slurp)"; static final String BingUserAgent = "Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bot.htm)"; public enum SearchEngine { Google { @Override public String toString() { return "Google"; } }, Yahoo { @Override public String toString() { return "Yahoo"; } }, Bing { @Override public String toString() { return "Bing"; } } } /** * Creates a new instance of SearchEngineAggregator */ public SearchEngineAggregator() { } /** * Retrieves representation of an instance of com.mycompany.searchengineaggregator.SearchEngineAggregator * @param query * @return an instance of java.lang.String * @throws org.json.JSONException */ @GET @Path("{query}") @Produces(MediaType.TEXT_HTML) public String getHtml(@PathParam("query") String query) throws JSONException { return compileResults(getSearchResults(query, SearchEngine.Google), getSearchResults(query, SearchEngine.Yahoo), getSearchResults(query, SearchEngine.Bing)); } /** * PUT method for updating or creating an instance of SearchEngineAggregator * @param content representation for the resource */ @PUT @Consumes(MediaType.TEXT_HTML) public void putHtml(String content) { } /* Returns HashSet of query results of type JSONObject from Google */ public ArrayList<JSONObject> getSearchResults(String query, SearchEngine searchEngine) throws JSONException { ArrayList<JSONObject> result = new ArrayList<>(); StringBuilder url = new StringBuilder(); Document doc = null; String userAgent = null; //Handles specific search engine connection details switch (searchEngine) { case Google: url.append("https://www.google.com/search?q="); url.append(query); userAgent = GoogleUserAgent; break; case Yahoo: url.append("https://search.yahoo.com/search?q="); url.append(query); userAgent = YahooUserAgent; break; case Bing: url.append("http://www.bing.com/search?q="); url.append(query); userAgent = BingUserAgent; break; } if (userAgent == null) return null; try { //Creates a connection, and fetches and parses the HTML file doc = Jsoup.connect(url.toString()).data("query", "Java").userAgent(userAgent).cookie("auth", "token") .timeout(3000).get(); } catch (IOException ex) { Logger.getLogger(SearchEngineAggregator.class.getName()).log(Level.SEVERE, null, ex); } if (doc == null) return null; //Get all links with attribute href Elements links = doc.select("a[href]"); int i = 1; //For every link, check if url was outbound, strip non-url substring and append to result for (Element link : links) { String tempLink = link.attr("href"); String tempText = link.text(); //Handles specific search engine result filters switch (searchEngine) { case Google: if (tempLink.startsWith("/url?q=") && !tempText.equals("Cached")) { tempLink = tempLink.replace("/url?q=", ""); result.add(toSearchResultJSONObject(searchEngine.toString(), i, tempText, tempLink)); i++; } break; case Yahoo: if (tempLink.startsWith("http") && !tempText.equals("Cached")) { result.add(toSearchResultJSONObject(searchEngine.toString(), i, tempText, tempLink)); i++; break; } case Bing: if (tempLink.startsWith("http") && !tempText.equals("Cached")) { result.add(toSearchResultJSONObject(searchEngine.toString(), i, tempText, tempLink)); i++; break; } } } return result; } /* Returns JSONObject with all passed data */ public JSONObject toSearchResultJSONObject(String searchEngine, int resultNumber, String text, String url) throws JSONException { JSONObject temp = new JSONObject(); temp.put("search engine", searchEngine); temp.put("result number", resultNumber); temp.put("text", text); temp.put("url", url); return temp; } /* Returns string of all SearchResult objects converted to JSON, from each search engine */ public String compileResults(ArrayList<JSONObject> result1, ArrayList<JSONObject> result2, ArrayList<JSONObject> result3) { StringBuilder finalResult = new StringBuilder(); ArrayList<JSONObject> combinedResults = new ArrayList<>(result1.size() + result2.size() + result3.size()); combinedResults.addAll(result1); combinedResults.addAll(result2); combinedResults.addAll(result3); //Sorts combined list so that all results appear in order of "hit" //E.g. Google's first result, Yahoo's first result, Bing's first result, etc. Collections.sort(combinedResults, new Comparator<JSONObject>() { @Override public int compare(JSONObject obj1, JSONObject obj2) { try { if (obj1.getInt("result number") < obj2.getInt("result number")) { return -1; } else if (obj1.getInt("result number") == obj2.getInt("result number")) { return 0; } else { return 1; } } catch (JSONException ex) { Logger.getLogger(SearchEngineAggregator.class.getName()).log(Level.SEVERE, null, ex); } return -1; } }); finalResult.append("\n["); //Appends all results from each search engine to the final result string for (JSONObject s : combinedResults) { finalResult.append(s.toString()); finalResult.append(","); } //Replace last comma with ']' finalResult.replace(finalResult.length() - 1, finalResult.length(), "]"); return finalResult.toString(); } }