Java tutorial
// Copyright (C) 2013 Markus Fischer // // This program is free software; you can redistribute it and/or // modify it under the terms of the GNU General Public License // as published by the Free Software Foundation; version 2 of the License. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with this program; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. // // Contact: info@doctor-doc.com package util; import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.ListIterator; import org.jsoup.Jsoup; import org.jsoup.safety.Whitelist; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import ch.dbs.form.SeeksForm; import com.google.gson.JsonArray; import com.google.gson.JsonElement; import com.google.gson.JsonParser; import enums.Connect; public class Seeks { private static final Logger LOG = LoggerFactory.getLogger(Seeks.class); /** * Performs a query to a Seeks server. The query must be natural text. * The query will get UTF-8 encoded an embedded in a search URL to a random * Seeks server. In case of an unresponsive Seeks server, it will retry to * query (other) Seeks server. **/ public List<SeeksForm> search(final String query) { // http://seeks-project.info/wiki/index.php/API-0.4.0 final List<SeeksForm> result = new ArrayList<SeeksForm>(); final List<SeeksForm> firstPriority = new ArrayList<SeeksForm>(); final List<SeeksForm> secondPriority = new ArrayList<SeeksForm>(); final List<SeeksForm> thirdPriority = new ArrayList<SeeksForm>(); // List of randomly shuffled numbers between a min and max. Contains no duplicates. final ListIterator<Integer> shuffledNumbers = getRandomNumber(0, ReadSystemConfigurations.getSeeksServer().length); // read in the first server from randomly shuffled list int nextServerNumber = shuffledNumbers.next(); try { final Http http = new Http(); String json = null; int maxRetry = 0; // repeat requests until we get a response or reach the max number of retries while ((json == null || "".equals(json)) && maxRetry < 2) { json = http.getContent(composeSearch(query, nextServerNumber), Connect.TIMEOUT_3.getValue(), Connect.TRIES_1.getValue(), "utf-8"); maxRetry++; // use next server if there are more, if not reuse existing server. if (shuffledNumbers.hasNext()) { nextServerNumber = shuffledNumbers.next(); } } // we still may get back a null/empty answer if (json != null && !"".equals(json)) { final JsonElement jsonRoot = new JsonParser().parse(json); final JsonArray jsonSnippets = jsonRoot.getAsJsonObject().getAsJsonArray("snippets"); for (final JsonElement jsonElement : jsonSnippets) { final SeeksForm record = new SeeksForm(); // we should always have an ID record.setId(jsonElement.getAsJsonObject().get("id").getAsString()); // we should always have a title record.setTitle(org.apache.commons.lang.StringEscapeUtils.unescapeHtml(Jsoup .clean(jsonElement.getAsJsonObject().get("title").getAsString(), Whitelist.none()))); // clean possible HTML entities // we should always have an URL record.setUrl(jsonElement.getAsJsonObject().get("url").getAsString()); // type may be null if (jsonElement.getAsJsonObject().get("type") != null) { record.setType(jsonElement.getAsJsonObject().get("type").getAsString()); } // summary may be null if (jsonElement.getAsJsonObject().get("summary") != null) { record.setSummary(jsonElement.getAsJsonObject().get("summary").getAsString()); } // improving existing relevance sorting if ("file".equals(record.getType()) && compareTitle(record, query)) { firstPriority.add(record); } else if (compareTitle(record, query)) { secondPriority.add(record); } else { thirdPriority.add(record); } } } } catch (final Exception e) { LOG.error(e.toString()); } finally { // create order: // type "file" and matching title result.addAll(firstPriority); // matching title result.addAll(secondPriority); // the rest result.addAll(thirdPriority); } return result; } /** * Compare method, used to improve the relevance sorting of the results, * returned by the Seeks server. **/ private boolean compareTitle(final SeeksForm record, final String query) { final String compare = prepareCompare(record.getTitle().substring(0, record.getTitle().length() / 2)); if (prepareCompare(query).startsWith(compare)) { return true; } return false; } /** * Prepares a String for comparison: converts * string to lower case, resolves to ss and removes * all non letter and non digit characters. */ private String prepareCompare(String input) { if (input == null) { return ""; } input = input.toLowerCase(); input = input.replaceAll("", "ss"); // remove all non-letter characters (including spaces) final StringBuffer strBuff = new StringBuffer(); char c; final int max = input.length(); for (int i = 0; i < max; i++) { c = input.charAt(i); if (Character.isLetterOrDigit(c)) { strBuff.append(c); } } return strBuff.toString(); } /** * Creates a UTF-8 encoded search URL to a random Seeks server, specified in SystemConfiguration **/ private String composeSearch(final String query, final int randomNumber) throws IllegalArgumentException { // final String search = "http://seeks.ru/search/txt/Refined+prediction+of+week+12+response+and+SVR+based+on+week+4+response+in+HCV+genotype+1+patients?output=json"; if (query == null || "".equals(query)) { throw new IllegalArgumentException("query must not be empty!"); } final CodeUrl coder = new CodeUrl(); // get Seeks server to query final StringBuffer buf = new StringBuffer(128); buf.append(ReadSystemConfigurations.getSeeksServer()[randomNumber]); if (buf.charAt(buf.length() - 1) != '/') { buf.append("/"); } buf.append("search/txt/"); buf.append(coder.encode(query, "utf-8")); buf.append("?output=json"); buf.append("&rpp=10"); // limit number of records return buf.toString(); } /** Returns a ListIterator of number between and including a minimum and maximum, randomly shuffled */ private ListIterator<Integer> getRandomNumber(final int min, final int max) { final ArrayList<Integer> numbers = new ArrayList<Integer>(max); for (int i = min; i < max; i++) { numbers.add(i); } Collections.shuffle(numbers); return numbers.listIterator(); } }