Java tutorial
/************************************************************************** * Copyright (C) 2010 Atlas of Living Australia * All Rights Reserved. * * The contents of this file are subject to the Mozilla Public * License Version 1.1 (the "License"); you may not use this file * except in compliance with the License. You may obtain a copy of * the License at http://www.mozilla.org/MPL/ * * Software distributed under the License is distributed on an "AS * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or * implied. See the License for the specific language governing * rights and limitations under the License. ***************************************************************************/ package au.org.ala.biocache.util; import au.org.ala.names.model.LinnaeanRankClassification; import au.org.ala.names.model.NameSearchResult; import au.org.ala.names.model.RankType; import au.org.ala.names.search.*; import org.apache.lucene.document.Document; import org.apache.lucene.index.Term; import org.apache.lucene.search.*; import org.apache.solr.client.solrj.util.ClientUtils; import java.io.IOException; import java.lang.reflect.Field; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; /** * Some additional methods that should be moved to ALANameSearcher. * * Created by Adam Collins on 22/09/15. */ public class ALANameSearcherExt extends ALANameSearcher { public ALANameSearcherExt(String path) throws IOException { super(path); } private IndexSearcher getIdentifierIdxSearcher() throws IOException { return cbSearcher; } private IndexSearcher getVernIdxSearcher() throws IOException { return vernSearcher; } private void appendAutocompleteResults(Map<String, Map> output, TopDocs results, boolean includeSynonyms, boolean commonNameResults) throws IOException { ScoreDoc[] scoreDocs = results.scoreDocs; int scoreDocsCount = scoreDocs.length; for (int excludedResult = 0; excludedResult < scoreDocsCount; ++excludedResult) { ScoreDoc i = scoreDocs[excludedResult]; Document src = commonNameResults ? getVernIdxSearcher().doc(i.doc) : getIdentifierIdxSearcher().doc(i.doc); NameSearchResult nsr = commonNameResults ? searchForRecordByLsid(src.get("lsid")) : new NameSearchResult(src, null); if (nsr == null || (nsr.getLeft() == null && !includeSynonyms)) continue; Map m = formatAutocompleteNsr(i.score, nsr); //use the matched common name if (commonNameResults) { m.put("commonname", src.get("common_orig")); m.put("match", "commonName"); } else { m.put("match", "scientificName"); } while (includeSynonyms && nsr != null && m != null && nsr.getAcceptedLsid() != null) { if (output.containsKey(nsr.getAcceptedLsid())) { List list = (List) output.get(nsr.getAcceptedLsid()).get("synonymMatch"); if (list == null) list = new ArrayList(); list.add(m); output.get(nsr.getAcceptedLsid()).put("synonymMatch", list); m = null; nsr = null; } else { nsr = searchForRecordByLsid(nsr.getAcceptedLsid()); if (nsr != null) { List list = new ArrayList(); list.add(m); m = formatAutocompleteNsr(i.score, nsr); m.put("synonymMatch", list); } } } if (((nsr != null && nsr.getAcceptedLsid() == null) || includeSynonyms) && m != null) { if (m.get("name").toString().equals("Acacia")) { int aa = 4; } Map existing = output.get(m.get("lsid").toString()); if (existing == null) { output.put(m.get("lsid").toString(), m); } else { //use best score if ((Float) m.get("score") > (Float) existing.get("score")) { output.put(m.get("lsid").toString(), m); } } } } } private Query buildAutocompleteQuery(String field, String q, boolean allSearches) { //best match Query fq1 = new TermQuery(new Term(field, q)); //exact match fq1.setBoost(12f); //partial matches Query fq5 = new WildcardQuery(new Term(field, q + "*")); //begins with that begins with Query fq6 = new WildcardQuery(new Term(field, "* " + q + "*")); //contains word that begins with //any match Query fq7 = new WildcardQuery(new Term(field, "*" + q + "*")); //any match //join BooleanQuery o = new BooleanQuery(); o.add(fq1, BooleanClause.Occur.SHOULD); o.add(fq5, BooleanClause.Occur.SHOULD); o.add(fq6, BooleanClause.Occur.SHOULD); o.add(fq7, BooleanClause.Occur.SHOULD); return o; } private String getPreferredGuid(String taxonConceptGuid) throws Exception { Query qGuid = new TermQuery(new Term("guid", taxonConceptGuid)); Query qOtherGuid = new TermQuery(new Term("otherGuid", taxonConceptGuid)); BooleanQuery fullQuery = new BooleanQuery(true); fullQuery.add(qGuid, BooleanClause.Occur.SHOULD); fullQuery.add(qOtherGuid, BooleanClause.Occur.SHOULD); TopDocs topDocs = getIdentifierIdxSearcher().search(fullQuery, 1); for (ScoreDoc scoreDoc : topDocs.scoreDocs) { Document doc = getIdentifierIdxSearcher().doc(scoreDoc.doc); return doc.get("guid"); } return taxonConceptGuid; } private boolean isKingdom(String name) { try { LinnaeanRankClassification lc = new LinnaeanRankClassification(name, null); NameSearchResult nsr = searchForRecord(lc, false); return nsr != null && nsr.getRank() == RankType.KINGDOM; } catch (Exception e) { return false; } } private String[] extractComponents(String in) { String[] retArray = new String[2]; int lastOpen = in.lastIndexOf("("); int lastClose = in.lastIndexOf(")"); if (lastOpen < lastClose) { //check to see if the last brackets are a kingdom String potentialKingdom = in.substring(lastOpen + 1, lastClose); if (isKingdom(potentialKingdom)) { retArray[0] = in.substring(0, lastOpen); retArray[1] = potentialKingdom; } else { retArray[0] = in; } } else { retArray[0] = in; //kingdom is null } return retArray; } private String getLsidByNameAndKingdom(String parameter) { String lsid = null; String name = null; String kingdom = null; String[] parts = extractComponents(parameter); name = parts[0]; name = name.replaceAll("_", " "); name = name.replaceAll("\\+", " "); kingdom = parts[1]; if (kingdom != null) { LinnaeanRankClassification cl = new LinnaeanRankClassification(kingdom, null); cl.setScientificName(name); try { lsid = searchForLSID(cl.getScientificName(), cl, null); } catch (ExcludedNameException e) { if (e.getNonExcludedName() != null) lsid = e.getNonExcludedName().getLsid(); else lsid = e.getExcludedName().getLsid(); } catch (ParentSynonymChildException e) { //the child is the one we want lsid = e.getChildResult().getLsid(); } catch (MisappliedException e) { if (e.getMisappliedResult() != null) lsid = e.getMatchedResult().getLsid(); } catch (SearchResultException e) { } } //check for a scientific name first - this will lookup in the name matching index. This will produce the correct result in a majority of scientific name cases. if (lsid == null || lsid.length() < 1) { try { lsid = searchForLSID(name, true, true); } catch (ExcludedNameException e) { if (e.getNonExcludedName() != null) lsid = e.getNonExcludedName().getLsid(); else lsid = e.getExcludedName().getLsid(); } catch (ParentSynonymChildException e) { //the child is the one we want lsid = e.getChildResult().getLsid(); } catch (MisappliedException e) { if (e.getMisappliedResult() != null) lsid = e.getMatchedResult().getLsid(); } catch (SearchResultException e) { } } if (lsid == null || lsid.length() < 1) { lsid = searchForLSIDCommonName(name); } if (lsid == null || lsid.length() < 1) { lsid = findLSIDByConcatName(name); } return lsid; } private String concatName(String name) { String patternA = "[^a-zA-Z]"; /* replace multiple whitespaces between words with single blank */ String patternB = "\\b\\s{2,}\\b"; String cleanQuery = ""; if (name != null) { cleanQuery = ClientUtils.escapeQueryChars(name);//.toLowerCase(); cleanQuery = cleanQuery.toLowerCase(); cleanQuery = cleanQuery.replaceAll(patternA, ""); cleanQuery = cleanQuery.replaceAll(patternB, ""); cleanQuery = cleanQuery.trim(); } return cleanQuery; } private String findLSIDByConcatName(String name) { try { String concatName = concatName(name); Query query = new TermQuery(new Term("concat_name", concatName)); TopDocs topDocs = getIdentifierIdxSearcher().search(query, 2); if (topDocs != null && topDocs.totalHits == 1) { for (ScoreDoc scoreDoc : topDocs.scoreDocs) { Document doc = getIdentifierIdxSearcher().doc(scoreDoc.doc); return doc.get("guid"); } } } catch (Exception e) { // do nothing } return null; } /** * from bie ws/guid/batch * * returned list of guid that is the same length as the input list * * @param taxaQueries a list of taxa queries * @return */ public List<String> getGuidsForTaxa(List<String> taxaQueries) { List guids = new ArrayList<String>(); for (int i = 0; i < taxaQueries.size(); i++) { String scientificName = taxaQueries.get(i); String lsid = getLsidByNameAndKingdom(scientificName); if (lsid != null && lsid.length() > 0) { String guid = null; try { guid = getExtendedTaxonConceptByGuid(lsid, true, true); } catch (Exception e) { } guids.add(guid); } if (guids.size() < i + 1) guids.add(null); } return guids; } private String getExtendedTaxonConceptByGuid(String guid, boolean checkPreferred, boolean checkSynonym) throws Exception { //Because a concept can be accepted and a synonym we need to check if the original guid exists before checking preferred NameSearchResult nsr = searchForRecordByLsid(guid); boolean hasAccepted = nsr != null && nsr.getAcceptedLsid() == null; if (checkPreferred && !hasAccepted) { guid = getPreferredGuid(guid); } if (checkSynonym && !hasAccepted) { if (nsr != null && nsr.isSynonym()) { guid = nsr.getAcceptedLsid(); } } return guid; } /** * Basic autocomplete. All matches are resolved to accepted LSID. * * @param q * @param max * @param includeSynonyms * @return */ public List<Map> autocomplete(String q, int max, boolean includeSynonyms) { try { if (false) { return null; } else { Map<String, Map> output = new HashMap<String, Map>(); //more queries for better scoring values String lq = q.toLowerCase(); String uq = q.toUpperCase(); //name search Query fq = buildAutocompleteQuery("name", lq, false); BooleanQuery b = new BooleanQuery(); b.add(fq, BooleanClause.Occur.MUST); b.add(new WildcardQuery(new Term("left", "*")), includeSynonyms ? BooleanClause.Occur.SHOULD : BooleanClause.Occur.MUST); TopDocs results = getIdentifierIdxSearcher().search(b, max); appendAutocompleteResults(output, results, includeSynonyms, false); //format search term for the current common name index uq = concatName(uq).toUpperCase(); //common name search fq = buildAutocompleteQuery("common", uq, true); results = getVernIdxSearcher().search(fq, max); appendAutocompleteResults(output, results, includeSynonyms, true); return new ArrayList(output.values()); } } catch (Exception e) { e.printStackTrace(); } return null; } private Map formatAutocompleteNsr(float score, NameSearchResult nsr) { Map m = new HashMap(); m.put("score", score); m.put("lsid", nsr.getLsid()); m.put("left", nsr.getLeft()); m.put("right", nsr.getRight()); m.put("rank", nsr.getRank()); m.put("rankId", nsr.getRank() != null ? nsr.getRank().getId() : 10000); m.put("cl", nsr.getRankClassification()); m.put("name", nsr.getRankClassification() != null ? nsr.getRankClassification().getScientificName() : null); m.put("acceptedLsid", nsr.getAcceptedLsid()); m.put("commonname", getCommonNameForLSID(nsr.getLsid())); m.put("commonnames", getCommonNamesForLSID(nsr.getLsid(), 1000)); return m; } }