au.org.ala.names.search.ALANameSearcher.java Source code

Introduction

Here is the source code for au.org.ala.names.search.ALANameSearcher.java
Source

/*
 * Copyright (C) 2014 Atlas of Living Australia
 * All Rights Reserved.
 *
 * The contents of this file are subject to the Mozilla Public
 * License Version 1.1 (the "License"); you may not use this file
 * except in compliance with the License. You may obtain a copy of
 * the License at http://www.mozilla.org/MPL/
 *
 * Software distributed under the License is distributed on an "AS
 * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
 * implied. See the License for the specific language governing
 * rights and limitations under the License.
 */

package au.org.ala.names.search;

import au.org.ala.names.lucene.analyzer.LowerCaseKeywordAnalyzer;
import au.org.ala.names.model.*;
import au.org.ala.names.parser.PhraseNameParser;
import au.org.ala.names.util.TaxonNameSoundEx;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.*;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.*;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.gbif.ecat.model.ParsedName;
import org.gbif.ecat.parser.UnparsableException;
import org.gbif.ecat.voc.NameType;
import org.gbif.ecat.voc.Rank;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * The API used to perform a search on the ALA Name Matching Lucene Index.  It follows the following
 * algorithm when trying to find a match:
 * <p/>
 * 1. Search for a direct match for supplied name on the name field (with the optional rank provided).
 * <p/>
 * 2. Search for a match on the alternative name field (with optional rank)
 * <p/>
 * 3. Generate a searchable canonical name for the supplied name.  Search for a match on
 * the searchable canonical field using the generated name
 * <p/>
 *
 * When a match is found the existence of homonyms are checked.  Where a homonym exists,
 * if the kingdom of the result does not match the supplied kingdom a HomonymException is thrown.
 *
 * For more details about the algorithm please see
 * http://code.google.com/p/ala-portal/wiki/ALANames#Understanding_the_Name_Matching_Algorithm
 *
 *
 * @author Natasha
 */
public class ALANameSearcher {
    protected Log log = LogFactory.getLog(ALANameSearcher.class);
    private DirectoryReader cbReader, irmngReader, vernReader;
    private IndexSearcher cbSearcher, irmngSearcher, vernSearcher, idSearcher;
    private ThreadLocal<QueryParser> queryParser;
    private ThreadLocal<QueryParser> idParser;
    protected TaxonNameSoundEx tnse;
    private PhraseNameParser parser;
    private static final String RANK_MARKER_ALL = "( " + StringUtils.join(Rank.RANK_MARKER_MAP.keySet(), "| ")
            + ")\\.";
    private static final Pattern RANK_MARKER = Pattern.compile(RANK_MARKER_ALL);
    //public static final Set<String> stopWords = new java.util.HashSet<String>(java.util.Arrays.asList(new String[]{"virus", "ictv", "ICTV"}));
    public static final Pattern virusStopPattern = Pattern.compile(" virus| ictv| ICTV");
    public static final Pattern voucherRemovePattern = Pattern.compile(" |,|&|\\.");
    public static final Pattern affPattern = Pattern
            .compile("([\\x00-\\x7F\\s]*) aff[#!?\\\\. ]([\\x00-\\x7F\\s]*)");
    public static final Pattern cfPattern = Pattern.compile("([\\x00-\\x7F\\s]*) cf[#!?\\\\. ]([\\x00-\\x7F\\s]*)");
    /**
     * A set of names that are cross rank homonyms.
     */
    private Set crossRankHomonyms;

    public ALANameSearcher() {
    }

    /**
     * Creates a new name searcher. Using the indexDirectory
     * as the source directory
     *
     * @param indexDirectory The directory that contains the index files for the scientific names, irmng and vernacular names.
     * @throws CorruptIndexException
     * @throws IOException
     */
    public ALANameSearcher(String indexDirectory) throws CorruptIndexException, IOException {
        //Initialis CB index searching items
        log.debug("Creating the search object for the name matching api...");
        //make the query parsers thread safe
        queryParser = new ThreadLocal<QueryParser>() {
            @Override
            protected QueryParser initialValue() {
                QueryParser qp = new QueryParser(Version.LUCENE_34, "genus", new LowerCaseKeywordAnalyzer());
                qp.setFuzzyMinSim(0.8f); //fuzzy match similarity setting. used to match the authorship.
                return qp;
            }
        };
        idParser = new ThreadLocal<QueryParser>() {
            @Override
            protected QueryParser initialValue() {
                return new QueryParser(Version.LUCENE_34, "lsid",
                        new org.apache.lucene.analysis.core.KeywordAnalyzer());
            }
        };

        cbReader = DirectoryReader.open(FSDirectory.open(createIfNotExist(indexDirectory + File.separator + "cb")));//false
        cbSearcher = new IndexSearcher(cbReader);
        //Initalise the IRMNG index searching items
        irmngReader = DirectoryReader
                .open(FSDirectory.open(createIfNotExist(indexDirectory + File.separator + "irmng")));
        irmngSearcher = new IndexSearcher(irmngReader);
        //initalise the Common name index searching items
        vernReader = DirectoryReader
                .open(FSDirectory.open(createIfNotExist(indexDirectory + File.separator + "vernacular")));
        vernSearcher = new IndexSearcher(vernReader);
        //initialise the identifier index
        idSearcher = new IndexSearcher(
                DirectoryReader.open(FSDirectory.open(createIfNotExist(indexDirectory + File.separator + "id"))));
        tnse = new TaxonNameSoundEx();
        parser = new PhraseNameParser();
        crossRankHomonyms = au.org.ala.names.util.FileUtils.streamToSet(
                this.getClass().getClassLoader().getResourceAsStream("au/org/ala/homonyms/cross_rank_homonyms.txt"),
                new java.util.HashSet<String>(), true);
    }

    private File createIfNotExist(String indexDirectory) throws IOException {

        File idxFile = new File(indexDirectory);
        if (!idxFile.exists()) {
            FileUtils.forceMkdir(idxFile);
            Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_34);
            IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_34, analyzer);
            IndexWriter iw = new IndexWriter(FSDirectory.open(idxFile), conf);
            iw.commit();
            iw.close();
        }
        return idxFile;
    }

    public void reopenReaders() {
        //this should only need to reopen the cbSearcher because the others should NOT be changing
        try {
            DirectoryReader newReader = DirectoryReader.openIfChanged(cbReader);

            //IndexReader tmpReader = cbReader.reopen();
            if (newReader != null) {
                cbReader.close();
                cbReader = newReader;
                //now reinit the searcher
                cbSearcher = new IndexSearcher(cbReader);
            }
        } catch (Exception e) {

        }

    }

    /**
     * Dumps a list of the species LSID's that are contained in the index.
     */
    public void dumpSpecies() {
        try {
            OutputStreamWriter fileOut = new OutputStreamWriter(new FileOutputStream("/data/species.txt"), "UTF-8");
            Term term = new Term("rank", "species");
            TopDocs hits = cbSearcher.search(new TermQuery(term), 2000000);

            for (ScoreDoc sdoc : hits.scoreDocs) {
                Document doc = cbReader.document(sdoc.doc);

                if (doc.getField("synonym") == null) {
                    String lsid = StringUtils.trimToNull(doc.getField("lsid").stringValue());
                    if (lsid == null)
                        lsid = doc.getField("id").stringValue();
                    fileOut.write(lsid + "\n");
                }

            }
            fileOut.flush();

        } catch (Exception e) {
            e.printStackTrace();
        }

    }

    /**
     * Searches the index for the supplied name with or without fuzzy name matching.
     * Returns null when there is no result
     * or the LSID for the first result. Where no LSID exist for the record the
     * CB ID is returned instead
     *
     * @param name
     * @param fuzzy look for a fuzzy match
     * @return
     * @throws HomonymException when an unresolved homonym is detected
     */
    public String searchForLSID(String name, boolean fuzzy) throws SearchResultException {
        return searchForLSID(name, null, fuzzy);
    }

    /**
     * Search for lsid with or without fuzzy matching. Ignoring or using homonyms.
     * @param name
     * @param fuzzy when true fuzzy matches are accepted
     * @param ignoreHomonyms  When true homonyms will be ignored if a single result is located.
     * @return
     * @throws SearchResultException
     */
    public String searchForLSID(String name, boolean fuzzy, boolean ignoreHomonyms) throws SearchResultException {
        return searchForLSID(name, null, fuzzy, ignoreHomonyms);
    }

    /**
     * Searches for the name without using fuzzy name matching...
     *
     * @param name scientific name for a taxon
     * @see #searchForLSID(java.lang.String, boolean)
     */
    public String searchForLSID(String name) throws SearchResultException {
        return searchForLSID(name, false);
    }

    /**
     * Searches the index for the supplied name of the specified rank with or without
     * fuzzy name matching.  Returns
     * null when there is no result or the LSID for the first result.
     * Where no LSID exist for the record the
     * CB ID is returned instead
     * <p/>
     * When the result is a synonym the "accepted" taxons's LSID is returned.
     *
     * @param name
     * @param rank
     * @param fuzzy look for a fuzzy match
     * @return
     * @throws HomonymException when an unresolved homonym is detected
     */
    public String searchForLSID(String name, RankType rank, boolean fuzzy) throws SearchResultException {
        return searchForLSID(name, null, rank, fuzzy, false);

    }

    /**
     * Searches for the supplied name of the specified rank with or without fuzzy name matching. When ignoreHomonyms is true
     * A homonym exception will only be thrown if a homonym is detected where the ALA has both names.
     *
     * @param name
     * @param rank
     * @param fuzzy
     * @param ignoreHomonyms
     * @return
     * @throws SearchResultException
     */
    public String searchForLSID(String name, RankType rank, boolean fuzzy, boolean ignoreHomonyms)
            throws SearchResultException {
        return searchForLSID(name, null, rank, fuzzy, ignoreHomonyms);
    }

    /**
     * Searches for an LSID of the supplied name and rank without a fuzzy match...
     *
     * @param name
     * @param rank
     * @return
     * @throws SearchResultException
     * @see #searchForLSID(java.lang.String, au.org.ala.names.model.RankType, boolean)
     */
    public String searchForLSID(String name, RankType rank) throws SearchResultException {
        return searchForLSID(name, null, rank, false, false);
    }

    /**
     * Searches for the LSID of the supplied name and rank. Using the kingdom to
     * resolve homonym issues.
     *
     * @param name
     * @param kingdom
     * @param scientificName
     * @param rank
     * @return
     * @throws SearchResultException
     * @deprecated Use {@link #searchForLSID(String, au.org.ala.names.model.LinnaeanRankClassification, au.org.ala.names.model.RankType)}  instead.
     * It is more extensible to supply a classification object then a list of higher classification
     */
    @Deprecated
    public String searchForLSID(String name, String kingdom, String scientificName, RankType rank)
            throws SearchResultException {
        LinnaeanRankClassification cl = new LinnaeanRankClassification(kingdom, scientificName);
        return searchForLSID(name, cl, rank, false, false);
    }

    /**
     * Search for an LSID based on the supplied name, classification and rank with or without fuzzy name matching.
     * <p/>
     * When a classification is supplied it is used for 2 purposes:
     * <ol>
     * <li> To try and resolve potential homonyms</li>
     * <li> To provide "optional" components to the search.  Thus an incorrect higher
     * classification will not prevent matches from occurring.</li>
     * </ol>
     * If it is not provided and a homonym is detected in the result a HomonymException is thrown.
     *
     * @param name
     * @param cl    The high taxa that form the classification for the search item
     * @param rank
     * @param fuzzy look for a fuzzy match
     * @return
     * @throws HomonymException When an unresolved homonym is detected
     */
    public String searchForLSID(String name, LinnaeanRankClassification cl, RankType rank, boolean fuzzy,
            boolean ignoreHomonym) throws SearchResultException {
        String lsid = null;
        NameSearchResult result = searchForRecord(name, cl, rank, fuzzy, ignoreHomonym);
        if (result != null) {
            if (result.getAcceptedLsid() == null && result.getLsid() == null) {
                log.warn("LSID missing for [name=" + name + ", id=" + result.getId() + "]");
            } else {
                lsid = result.getAcceptedLsid() != null ? result.getAcceptedLsid() : result.getLsid();
            }
        }
        return lsid;
    }

    /**
     * Search for an LSID with the supplied classification without a fuzzy match.
     * Supplying to classification in this way allows the API to try and ascertain the rank and
     * the correct scientific name to use.
     *
     * @param cl the classification to work with
     * @return An LSID for the taxon or null if nothing matched or homonym issues detected
     * @throws SearchResultException
     */
    public String searchForLSID(LinnaeanRankClassification cl, boolean recursiveMatching)
            throws SearchResultException {
        NameSearchResult nsr = searchForRecord(cl, recursiveMatching);
        if (nsr != null) {
            return nsr.getLsid();
        }
        return null;
    }

    /**
     * Updates the supplied classification so that the supplied ID's are substituted with GUIDs.
     *
     * @param cl
     */
    public void updateClassificationWithGUID(LinnaeanRankClassification cl) {
        if (cl.getKid() != null) {
            cl.setKid(searchForLsidById(cl.getKid()));
        }
        if (cl.getPid() != null)
            cl.setPid(searchForLsidById(cl.getPid()));
        if (cl.getCid() != null)
            cl.setCid(searchForLsidById(cl.getCid()));
        if (cl.getOid() != null)
            cl.setOid(searchForLsidById(cl.getOid()));
        if (cl.getFid() != null)
            cl.setFid(searchForLsidById(cl.getFid()));
        if (cl.getGid() != null)
            cl.setGid(searchForLsidById(cl.getGid()));
        if (cl.getSid() != null)
            cl.setSid(searchForLsidById(cl.getSid()));
    }

    /**
     * Search for a result - optionally allowing for a recursive search
     * @param cl The classification to perform the match on
     * @param recursiveMatching When true attempt to match on higher classification
     * @return
     * @throws SearchResultException
     */
    public NameSearchResult searchForRecord(LinnaeanRankClassification cl, boolean recursiveMatching)
            throws SearchResultException {
        return searchForRecord(cl, recursiveMatching, false, false);
    }

    /**
     * Searches for a result returning a metrics of the result.  Optionally allowing for a recursive match
     * @param cl  The classification to perform the match on
     * @param recursiveMatching When true attempt to match on higher classification
     * @return The MetricResultDTO for the matched result.
     * @throws SearchResultException
     */
    public MetricsResultDTO searchForRecordMetrics(LinnaeanRankClassification cl, boolean recursiveMatching)
            throws SearchResultException {
        return searchForRecordMetrics(cl, recursiveMatching, false, false);
    }

    /**
     * Searches for a result returning a metrics of the result.  Optionally allowing for a recursive match and fuzzy matching.
     * @param cl The classification to perform the match on
     * @param recursiveMatching When true attempt to match on higher classification
     * @param fuzzy When true allow fuzzy matching on scientific names
     * @return The MetricResultDTO for the matched result.
     * @throws SearchResultException
     */
    public MetricsResultDTO searchForRecordMetrics(LinnaeanRankClassification cl, boolean recursiveMatching,
            boolean fuzzy) throws SearchResultException {
        return searchForRecordMetrics(cl, recursiveMatching, false, fuzzy);
    }

    /**
     *  Search for a result - optionally allowing for a recursive search and fuzzy matching
     * @param cl The classification to perform the match on
     * @param recursiveMatching When true attempt to match on higher classification
     * @param fuzzy When true allow fuzzy matching on scientific names
     * @return
     * @throws SearchResultException
     */
    public NameSearchResult searchForRecord(LinnaeanRankClassification cl, boolean recursiveMatching, boolean fuzzy)
            throws SearchResultException {
        return searchForRecord(cl, recursiveMatching, false, fuzzy);
    }

    /**
     * Search for an LSID with the supplied classification without a fuzzy match.
     * Supplying to classification in this way allows the API to try and ascertain the rank and
     * the correct scientific name to use.
     *
     * @param cl                the classification to work with
     * @param recursiveMatching whether to try matching to a higher taxon when leaf taxa matching fails
     * @return An LSID for the taxon or null if nothing matched or homonym issues detected
     * @throws SearchResultException
     */
    public NameSearchResult searchForRecord(LinnaeanRankClassification cl, boolean recursiveMatching,
            boolean addGuids, boolean fuzzy) throws SearchResultException {
        MetricsResultDTO res = searchForRecordMetrics(cl, recursiveMatching, addGuids, fuzzy);
        if (res.getLastException() != null)
            throw res.getLastException();
        return res.getResult();
    }

    /**
     * Search for a specific name returning extra metrics that can be reported as name match quality...
     *
     * @param cl
     * @param recursiveMatching
     * @param addGuids  When true will look up the guids for the higher classification (deprecated param as these are now stored with the index)
     * @param fuzzy When true allow fuzzy matching on scientific names
     * @return
     */
    public MetricsResultDTO searchForRecordMetrics(LinnaeanRankClassification cl, boolean recursiveMatching,
            boolean addGuids, boolean fuzzy) {
        return searchForRecordMetrics(cl, recursiveMatching, addGuids, fuzzy, false);
    }

    /**
     * Searches for a result returning a metrics of the result.  Optionally allowing for a recursive match and fuzzy matching and ignoring homonyms.
     * @param cl The classification to perform the match on
     * @param recursiveMatching When true attempt to match on higher classification
     * @param addGuids When true will look up the guids for the higher classification (deprecated param as these are now stored with the index)
     * @param fuzzy When true allow fuzzy matching on scientific names
     * @param ignoreHomonym When true ignore the homonym exception if a single result is returned.
     * @return
     */
    public MetricsResultDTO searchForRecordMetrics(LinnaeanRankClassification cl, boolean recursiveMatching,
            boolean addGuids, boolean fuzzy, boolean ignoreHomonym) {

        //set up the Object to return
        MetricsResultDTO metrics = new MetricsResultDTO();

        RankType rank = cl.getRank() != null ? RankType.getForStrRank(cl.getRank()) : null;
        String name = cl.getScientificName();
        String originalName = name;

        NameSearchResult nsr = null;
        metrics.setErrors(new HashSet<ErrorType>());

        if (name == null) {
            //ascertain the rank and construct the scientific name
            if (StringUtils.isNotEmpty(cl.getInfraspecificEpithet())
                    && !isInfraSpecificMarker(cl.getSubspecies())) {
                rank = RankType.SUBSPECIES;
                //construct the full scientific name from the parts
                if (StringUtils.isNotEmpty(cl.getGenus()) && StringUtils.isNotEmpty(cl.getSpecificEpithet())) {
                    name = cl.getGenus() + " " + cl.getSpecificEpithet() + " " + cl.getInfraspecificEpithet();
                }
            } else if (StringUtils.isNotEmpty(cl.getSubspecies()) && !isInfraSpecificMarker(cl.getSubspecies())) {
                rank = RankType.SUBSPECIES;
                name = cl.getSubspecies();
            } else if (StringUtils.isNotEmpty(cl.getSpecificEpithet()) && !isSpecificMarker(cl.getSpecies())) {
                rank = RankType.SPECIES;
                //construct the full scientific name from the parts
                if (StringUtils.isNotEmpty(cl.getGenus())) {
                    name = cl.getGenus() + " " + cl.getSpecificEpithet();
                }
            } else if (StringUtils.isNotEmpty(cl.getSpecies()) && !isSpecificMarker(cl.getSpecies())) {
                rank = RankType.SPECIES;
                //construct the full scientific name from the parts
                name = cl.getSpecies();
                //check to see of the name is a binomial
                if (!name.trim().contains(" ")) {
                    //construct the binomial
                    if (StringUtils.isNotEmpty(cl.getGenus())) {
                        name = cl.getGenus() + " " + cl.getSpecificEpithet();
                    } else {
                        name = null;
                    }
                }
            } else if (StringUtils.isNotEmpty(cl.getGenus())) {
                rank = RankType.GENUS;
                //construct the full scientific name from the parts
                name = cl.getGenus();
            } else if (StringUtils.isNotEmpty(cl.getFamily())) {
                rank = RankType.FAMILY;
                //construct the full scientific name from the parts
                name = cl.getFamily();
            } else if (StringUtils.isNotEmpty(cl.getOrder())) {
                rank = RankType.ORDER;
                //construct the full scientific name from the parts
                name = cl.getOrder();
            } else if (StringUtils.isNotEmpty(cl.getKlass())) {
                rank = RankType.CLASS;
                //construct the full scientific name from the parts
                name = cl.getKlass();
            } else if (StringUtils.isNotEmpty(cl.getPhylum())) {
                rank = RankType.PHYLUM;
                //construct the full scientific name from the parts
                name = cl.getPhylum();
            } else if (StringUtils.isNotEmpty(cl.getKingdom())) {
                rank = RankType.KINGDOM;
                //construct the full scientific name from the parts
                name = cl.getKingdom();
            }
            originalName = name;
            // nsr = searchForRecord(name, cl, rank, false);
        } else {
            //check to see if the rank can be determined by matching the scentific name to one of values
            if (rank == null && StringUtils.equalsIgnoreCase(name, cl.getSubspecies()))
                rank = RankType.SUBSPECIES;
            else if (rank == null && StringUtils.equalsIgnoreCase(name, cl.getSpecies()))
                rank = RankType.SPECIES;
            else if (rank == null && StringUtils.equalsIgnoreCase(name, cl.getGenus()))
                rank = RankType.GENUS;
            else if (rank == null && StringUtils.equalsIgnoreCase(name, cl.getFamily()))
                rank = RankType.FAMILY;
            else if (rank == null && StringUtils.equalsIgnoreCase(name, cl.getOrder()))
                rank = RankType.ORDER;
            else if (rank == null && StringUtils.equalsIgnoreCase(name, cl.getKlass()))
                rank = RankType.CLASS;
            else if (rank == null && StringUtils.equalsIgnoreCase(name, cl.getPhylum()))
                rank = RankType.PHYLUM;
            else if (rank == null && StringUtils.equalsIgnoreCase(name, cl.getKingdom()))
                rank = RankType.KINGDOM;

            if (rank == null) {
                if (recursiveMatching) {
                    if (name.endsWith(" sp") || name.endsWith(" sp.")) {
                        name = name.substring(0, name.lastIndexOf(" "));
                        cl.setGenus(name);
                    }
                }
                //check to see if the rank can be determined from the scientific name
                try {
                    ParsedName<?> cn = parser.parse(name.replaceAll("\\?", ""));
                    if (cn != null && cn.type == NameType.doubtful) {
                        //if recursive set the issue
                        if (recursiveMatching) {
                            name = cn.genusOrAbove;
                            rank = RankType.GENUS;
                            metrics.setNameType(NameType.doubtful);

                        }
                    } else if (cn != null && cn.isBinomial()) {
                        //set the genus if it is empty
                        if (StringUtils.isEmpty(cl.getGenus()))
                            cl.setGenus(cn.genusOrAbove);
                        if (cn.rank == null && cn.cultivar == null && cn.isParsableType()) {

                            if (cn.getInfraSpecificEpithet() != null) {
                                rank = RankType.SUBSPECIES;
                                //populate the species if it is empty
                                if (StringUtils.isEmpty(cl.getSpecies()))
                                    cl.setSpecies(cn.genusOrAbove + " " + cn.specificEpithet);
                            } else
                                rank = RankType.SPECIES;
                        } else if (cn.cultivar != null) {
                            rank = RankType.CULTIVAR;
                        } else if (cn.rank != null) {
                            // It is not necesary to update the rank based on rank markers at this point
                            // This is because it is done at the lowest level possible just before the search is performed

                        }
                    }
                } catch (org.gbif.ecat.parser.UnparsableException e) {
                    //TODO log error maybe??
                    metrics.setNameType(e.type);
                }
            }

        }

        nsr = performErrorCheckSearch(name.replaceAll("\\?", ""), cl, rank, fuzzy, ignoreHomonym, metrics);

        if (nsr == null && recursiveMatching) {
            //get the name type for the original name
            //remove the authorship from the search
            String authorship = cl.getAuthorship();
            cl.setAuthorship(null);
            try {
                ParsedName pn = parser.parse(name);
                metrics.setNameType(pn.getType());
                if (pn.type == NameType.doubtful || (rank != null && rank.getId() <= 7000) || rank == null)
                    nsr = performErrorCheckSearch(pn.getGenusOrAbove(), cl, null, fuzzy, ignoreHomonym, metrics);
            } catch (Exception e) {
            }

            if (nsr == null && rank != RankType.SPECIES
                    && ((StringUtils.isNotEmpty(cl.getSpecificEpithet())
                            && !isSpecificMarker(cl.getSpecificEpithet()))
                            || (StringUtils.isNotEmpty(cl.getSpecies()) && !isSpecificMarker(cl.getSpecies())))) {
                name = cl.getSpecies();
                if (StringUtils.isEmpty(name))
                    name = cl.getGenus() + " " + cl.getSpecificEpithet();

                nsr = performErrorCheckSearch(name, cl, RankType.SPECIES, fuzzy, ignoreHomonym, metrics);
            }
            if (nsr == null && cl.getGenus() != null) {
                nsr = performErrorCheckSearch(cl.getGenus(), cl, RankType.GENUS, fuzzy, ignoreHomonym, metrics);
            }
            if (nsr == null && cl.getFamily() != null) {
                nsr = performErrorCheckSearch(cl.getFamily(), cl, RankType.FAMILY, fuzzy, ignoreHomonym, metrics);
            }
            if (nsr == null && cl.getOrder() != null) {
                nsr = performErrorCheckSearch(cl.getOrder(), cl, RankType.ORDER, fuzzy, ignoreHomonym, metrics);
            }
            if (nsr == null && cl.getKlass() != null) {
                nsr = performErrorCheckSearch(cl.getKlass(), cl, RankType.CLASS, fuzzy, ignoreHomonym, metrics);
            }
            if (nsr == null && cl.getPhylum() != null) {
                nsr = performErrorCheckSearch(cl.getPhylum(), cl, RankType.PHYLUM, fuzzy, ignoreHomonym, metrics);
            }
            if (nsr == null && cl.getKingdom() != null) {
                nsr = performErrorCheckSearch(cl.getKingdom(), cl, RankType.KINGDOM, fuzzy, ignoreHomonym, metrics);
            }

            if (nsr != null) {
                nsr.setMatchType(MatchType.RECURSIVE);
            }
            //rest the author
            cl.setAuthorship(authorship);

        }

        //now start to get the metric object ready
        if (metrics.getNameType() == null) {
            try {
                ParsedName pn = parser.parse(originalName);
                metrics.setNameType(pn.type);
            } catch (UnparsableException e) {
                metrics.setNameType(e.type);
            }
        }

        checkOtherIssues(originalName, metrics);
        if (nsr != null) {
            //Obtain and store the GUIDs for the classification identifiers
            if (addGuids)
                updateClassificationWithGUID(nsr.getRankClassification());
        }

        if (metrics.getErrors().size() == 0)
            metrics.getErrors().add(ErrorType.NONE);

        metrics.setResult(nsr);

        return metrics;
    }

    private void checkOtherIssues(String originalName, MetricsResultDTO metrics) {
        if (originalName.contains("?")) {
            metrics.getErrors().add(ErrorType.QUESTION_SPECIES);
            metrics.setNameType(NameType.doubtful); //a questionable species is always a doubtful name type
        }
        if (cfPattern.matcher(originalName).matches())
            metrics.getErrors().add(ErrorType.CONFER_SPECIES);
        if (affPattern.matcher(originalName).matches())
            metrics.getErrors().add(ErrorType.AFFINITY_SPECIES);
    }

    /**
     * Performs a search.  Any error's encountered will be added to the supplied error set.
     *
     * @param name scientific name ro search for
     * @param cl The classification to perform the match on
     * @param rank Rank to perform the match on , when null no specific rank
     * @param fuzzy When true allow fuzzy matching on scientific names
     * @param ignoreHomonym When true ignore the homonym exception if a single result is returned.
     * @param metrics The metrics for this search. Errors will be applied to this metric
     * @return
     */
    private NameSearchResult performErrorCheckSearch(String name, LinnaeanRankClassification cl, RankType rank,
            boolean fuzzy, boolean ignoreHomonym, MetricsResultDTO metrics) {
        NameSearchResult nsr = null;
        try {
            nsr = searchForRecord(name, cl, rank, fuzzy, ignoreHomonym);
        } catch (MisappliedException e) {
            metrics.setLastException(e);
            metrics.getErrors().add(e.errorType);
            if (e.getMisappliedResult() != null)
                nsr = e.getMatchedResult();
        } catch (ParentSynonymChildException e) {
            metrics.setLastException(e);
            metrics.getErrors().add(e.errorType);
            nsr = e.getChildResult();
        } catch (ExcludedNameException e) {
            metrics.setLastException(e);
            metrics.getErrors().add(e.errorType);
            nsr = e.getNonExcludedName() != null ? e.getNonExcludedName() : e.getExcludedName();
        } catch (SearchResultException e) {
            metrics.setLastException(e);
            metrics.getErrors().add(e.errorType);
        }
        return nsr;
    }

    /**
     * FIXME need to include other types of marker
     *
     * @param subspecies
     * @return
     */
    private boolean isInfraSpecificMarker(String subspecies) {
        String epithet = StringUtils.trimToNull(subspecies);
        if (epithet != null) {
            if ("spp".equalsIgnoreCase(epithet) || "spp.".equalsIgnoreCase(epithet))
                return true;
        }
        return false;
    }

    /**
     * FIXME need to include other types of marker
     *
     * @param species
     * @return
     */
    private boolean isSpecificMarker(String species) {
        String epithet = StringUtils.trimToNull(species);
        if (epithet != null) {
            if ("sp".equalsIgnoreCase(epithet) || "sp.".equalsIgnoreCase(epithet)
                    || "sp.nov.".equalsIgnoreCase(species.replaceAll(" ", "")))
                return true;
        }
        return false;
    }

    /**
     * Search for an LSID based on suppled name, classification and rank without a fuzzy match...
     *
     * @param name
     * @param cl
     * @param rank
     * @return
     * @throws SearchResultException
     */
    public String searchForLSID(String name, LinnaeanRankClassification cl, RankType rank)
            throws SearchResultException {
        return searchForLSID(name, cl, rank, false, false);
    }

    /**
     * Searches the index for the supplied name of the specified rank.  Returns
     * null when there is no result or the result object for the first result.
     *
     * @param name
     * @param rank
     * @param fuzzy look for a fuzzy match
     * @return
     */
    public NameSearchResult searchForRecord(String name, RankType rank, boolean fuzzy)
            throws SearchResultException {
        return searchForRecord(name, null, rank, fuzzy);
    }

    /**
     * Searches index for the supplied name and rank without a fuzzy match.
     *
     * @param name
     * @return
     * @throws SearchResultException
     */
    public NameSearchResult searchForRecord(String name) throws SearchResultException {
        return searchForRecord(name, null, false);
    }

    /**
     * Searches index for the supplied name and rank without a fuzzy match.
     *
     * @param name
     * @param rank
     * @return
     * @throws SearchResultException
     */
    public NameSearchResult searchForRecord(String name, RankType rank) throws SearchResultException {
        return searchForRecord(name, rank, false);
    }

    /**
     * Returns the accepted LSID for the supplied classification.
     * <p/>
     * If a synonym is matched the accepted LSID is retrieved and returned.
     * <p/>
     * It uses the default error handling.  For example matches to excluded concepts are permitted.
     * <p/>
     * Do not use this method if you would like control over how error conditions are handled.
     *
     * @param cl
     * @param fuzzy
     * @return
     */
    public String searchForAcceptedLsidDefaultHandling(LinnaeanRankClassification cl, boolean fuzzy) {
        return searchForAcceptedLsidDefaultHandling(cl, fuzzy, false);
    }

    public String searchForAcceptedLsidDefaultHandling(LinnaeanRankClassification cl, boolean fuzzy,
            boolean ignoreHomonyms) {
        NameSearchResult nsr = searchForAcceptedRecordDefaultHandling(cl, fuzzy, ignoreHomonyms);
        if (nsr == null)
            return null;
        return nsr.getLsid();
    }

    /**
     * Returns the accepted result for the supplied classification.
     * <p/>
     * If a synonym is matched the accepted result is retrieved and returned.
     * <p/>
     * It uses the default error handling.  For example matches to excluded concepts are permitted.
     * <p/>
     * Do not use this method if you would like control over how error conditions are handled.
     *
     * @param cl
     * @param fuzzy
     * @return
     */
    public NameSearchResult searchForAcceptedRecordDefaultHandling(LinnaeanRankClassification cl, boolean fuzzy) {
        return searchForAcceptedRecordDefaultHandling(cl, fuzzy, false);
    }

    public NameSearchResult searchForAcceptedRecordDefaultHandling(LinnaeanRankClassification cl, boolean fuzzy,
            boolean ignoreHomonym) {
        NameSearchResult nsr = null;
        try {
            nsr = searchForRecord(cl.getScientificName(), cl, null, fuzzy, ignoreHomonym);
        } catch (MisappliedException e) {
            if (e.getMisappliedResult() != null)
                nsr = e.getMatchedResult();
        } catch (ParentSynonymChildException e) {
            nsr = e.getChildResult();
        } catch (ExcludedNameException e) {
            nsr = e.getNonExcludedName() != null ? e.getNonExcludedName() : e.getExcludedName();
        } catch (SearchResultException e) {
            //do nothing
        }

        //now check for accepted concepts
        if (nsr != null && nsr.isSynonym())
            nsr = searchForRecordByLsid(nsr.getAcceptedLsid());

        return nsr;
    }

    /**
     * Searches for a record based on the supplied name and rank. It uses the kingdom and genus to resolve homonyms.
     *
     * @param name
     * @param kingdom
     * @param genus
     * @param rank
     * @return
     * @throws SearchResultException
     * @deprecated Use {@link #searchForRecord(java.lang.String, au.org.ala.names.model.LinnaeanRankClassification, au.org.ala.names.model.RankType, boolean)} instead.
     * It is more extensible to supply a classification object then a list of higher classification
     */
    @Deprecated
    public NameSearchResult searchForRecord(String name, String kingdom, String genus, RankType rank)
            throws SearchResultException {
        LinnaeanRankClassification cl = new LinnaeanRankClassification(kingdom, genus);
        return searchForRecord(name, cl, rank, false);
    }

    public NameSearchResult searchForRecord(String name, LinnaeanRankClassification cl, RankType rank,
            boolean fuzzy) throws SearchResultException {
        return searchForRecord(name, cl, rank, fuzzy, false);
    }

    /**
     * Searches for a record based on the supplied name, rank and classification
     * with or without fuzzy name matching.
     *
     * @param name
     * @param cl
     * @param rank
     * @param fuzzy
     * @return
     * @throws SearchResultException
     */
    public NameSearchResult searchForRecord(String name, LinnaeanRankClassification cl, RankType rank,
            boolean fuzzy, boolean ignoreHomonyms) throws SearchResultException {
        //search for more than 1 term in case homonym resolution takes place at a lower level??
        List<NameSearchResult> results = searchForRecords(name, rank, cl, 10, fuzzy, ignoreHomonyms);
        if (results != null && results.size() > 0)
            return results.get(0);

        return null;
    }

    /**
     * Searches for a record based on the supplied name, classification and rank without fuzzy name matching
     *
     * @param name
     * @param cl
     * @param rank
     * @return
     * @throws SearchResultException
     */
    public NameSearchResult searchForRecord(String name, LinnaeanRankClassification cl, RankType rank)
            throws SearchResultException {
        return searchForRecord(name, cl, rank, false);
    }

    /**
     * Returns the records that has the supplied checklist bank id
     *
     * @param id
     * @return
     */
    public NameSearchResult searchForRecordByID(String id) {
        try {
            List<NameSearchResult> results = performSearch(ALANameIndexer.IndexField.ID.toString(), id, null, null,
                    1, null, false, idParser.get());
            if (results.size() > 0) {
                results.get(0).setMatchType(MatchType.TAXON_ID);
                return results.get(0);
            }
        } catch (SearchResultException e) {
            //this should not happen as we are  not checking for homonyms
            //homonyms should only be checked if a search is being performed by name
        } catch (IOException e) {
        }
        return null;
    }

    /**
     * Gets the LSID for the record that has the supplied checklist bank id.
     *
     * @param id
     * @return
     */
    public String searchForLsidById(String id) {
        NameSearchResult result = searchForRecordByID(id);
        if (result != null)
            return result.getAcceptedLsid() != null ? result.getAcceptedLsid() : result.getLsid();
        return null;
    }

    /**
     * Searches for records with the specified name and rank with or without fuzzy name matching
     *
     * @param name
     * @param rank
     * @param fuzzy search for a fuzzy match
     * @return
     */
    public List<NameSearchResult> searchForRecords(String name, RankType rank, boolean fuzzy)
            throws SearchResultException {
        return searchForRecords(name, rank, null, 10, fuzzy);
    }

    /**
     * Searches for a list of results for the supplied name, classification and rank without fuzzy match
     *
     * @param name
     * @param rank
     * @param cl
     * @param max
     * @return
     * @throws SearchResultException
     */
    public List<NameSearchResult> searchForRecords(String name, RankType rank, LinnaeanRankClassification cl,
            int max) throws SearchResultException {
        return searchForRecords(name, rank, cl, max, false);
    }

    /**
     * Searches for the records that satisfy the given conditions using the algorithm
     * outlined in the class description.
     *
     * @param name scientific name to search for
     * @param rank Rank to perform the match on , when null no specific rank
     * @param cl The high taxa that form the classification for the search item
     * @param max     The maximum number of results to return
     * @param fuzzy   search for a fuzzy match
     * @return
     * @throws SearchResultException
     */
    public List<NameSearchResult> searchForRecords(String name, RankType rank, LinnaeanRankClassification cl,
            int max, boolean fuzzy) throws SearchResultException {
        return searchForRecords(name, rank, cl, max, fuzzy, true, false);
    }

    public List<NameSearchResult> searchForRecords(String name, RankType rank, LinnaeanRankClassification cl,
            int max, boolean fuzzy, boolean ignoreHomonyms) throws SearchResultException {
        return searchForRecords(name, rank, cl, max, fuzzy, true, ignoreHomonyms);
    }

    /**
     * The new implementation for a name search as on December 2011.  It performs the following steps in an attempt to find a match:
     * <ol>
     * <li> Exact String Match of scientific name. </li>
     * <li> Canonical String Match when the parsed name is valid </li>
     * <li> Phrase Name, genus and optionally specificEpithet match when the name is determined to be a phrase </li>
     * <li> Sounds LIke match on genus, specific epithet and optionally infra specific epithet </li>
     * </ol>
     *
     * @param name
     * @param rank
     * @param cl
     * @param max
     * @param fuzzy
     * @param clean
     * @param ignoreHomonym When true ignore the homonym exception if a single result is returned.
     * @return
     * @throws SearchResultException
     */
    private List<NameSearchResult> searchForRecords(String name, RankType rank, LinnaeanRankClassification cl,
            int max, boolean fuzzy, boolean clean, boolean ignoreHomonym) throws SearchResultException {
        //The name is not allowed to be null

        //Check for null name before attempting to do anything else
        if (name == null)
            throw new SearchResultException("Unable to perform search. Null value supplied for the name.");
        //Check that the scientific name supplied is NOT a rank marker.
        if (PhraseNameParser.RANK_MARKER.matcher(name).matches())
            throw new SearchResultException("Supplied scientific name is a rank marker.");

        //remove all the "stop" words from the scientific name
        try {
            name = virusStopPattern.matcher(name).replaceAll(" ").trim();
        } catch (Exception e) {
            log.error(e.getMessage(), e);
        }

        //According to http://en.wikipedia.org/wiki/Species spp. is used as follows:
        //The authors use "spp." as a short way of saying that something applies to many species within a genus,
        //but do not wish to say that it applies to all species within that genus.
        //Thus we don't want to attempt to match on spp.
        if (name.contains("spp."))
            throw new SPPException();//SearchResultException("Unable to perform search. Can not match to a subset of species within a genus.");

        try {
            NameType nameType = null;
            ParsedName<?> pn = null;
            try {
                pn = parser.parse(name);
                nameType = pn != null ? pn.getType() : null;
            } catch (UnparsableException e) {
                log.warn("Unable to parse " + name + ". " + e.getMessage());
            }
            //Check for the exact match
            List<NameSearchResult> hits = performSearch(NameIndexField.NAME.toString(), name, rank, cl, max,
                    MatchType.EXACT, true, queryParser.get());
            if (hits == null) // situation where searcher has not been initialised
            {
                return null;
            }
            if (hits.size() > 0) {
                return hits;
            }

            //Use the parsed name and see what type of check to do next
            //at this point we don't want to match informal names
            //if(pn.getType() == NameType.informal)
            //    throw new InformalNameException();

            if (pn instanceof ALAParsedName) {
                //check the phrase name
                ALAParsedName alapn = (ALAParsedName) pn;
                String genus = alapn.getGenusOrAbove();
                String phrase = alapn.cleanPhrase;//alapn.getLocationPhraseDesciption();
                String voucher = alapn.cleanVoucher;
                //String voucher = alapn.phraseVoucher != null ? voucherRemovePattern.matcher(alapn.phraseVoucher).replaceAll("") :null;
                String specific = alapn.rank != null && alapn.rank.equals("sp.") ? null : alapn.specificEpithet;
                String[][] searchFields = new String[4][];
                searchFields[0] = new String[] { RankType.GENUS.getRank(), genus };
                searchFields[1] = new String[] { NameIndexField.PHRASE.toString(), phrase };
                searchFields[2] = new String[] { NameIndexField.VOUCHER.toString(), voucher };
                searchFields[3] = new String[] { NameIndexField.SPECIFIC.toString(), specific };
                hits = performSearch(searchFields, rank, cl, max, MatchType.PHRASE, false, queryParser.get()); //don't want to check for homonyms yet...
                if (hits.size() == 1) {
                    return hits;
                } else if (hits.size() > 1) {
                    //this represents a homonym issue between vouchers.
                    //don't throw a homonym if all results point to the same accepted concept
                    NameSearchResult commonAccepted = getCommonAccepetedConcept(hits);
                    if (commonAccepted != null) {
                        hits.removeAll(hits);
                        hits.add(commonAccepted);
                        return hits;
                    }
                    throw new HomonymException(hits);
                }
            } else if (pn != null && pn.isParsableType() && pn.authorsParsed && pn.getType() != NameType.informal
                    && pn.getType() != NameType.doubtful) {
                //check the canonical name
                String canonicalName = pn.canonicalName();
                if (cl == null) {
                    cl = new LinnaeanRankClassification();
                }
                //set the authorship if it has been supplied as part of the scientific name
                if (cl.getAuthorship() == null) {
                    cl.setAuthorship(pn.authorshipComplete());
                }
                hits = performSearch(ALANameIndexer.IndexField.NAME.toString(), canonicalName, rank, cl, max,
                        MatchType.CANONICAL, true, queryParser.get());
                if (hits.size() > 0) {
                    return hits;
                }
                //if the parse type was a cultivar and we didn't match it check to see if we can match as a phrase name
                if (pn.getType() == NameType.cultivar) {
                    String genus = pn.getGenusOrAbove();
                    String phrase = pn.getCultivar();
                    String voucher = null;
                    String specific = pn.rank != null && pn.rank.equals("sp.") ? null : pn.getSpecificEpithet();
                    String[][] searchFields = new String[4][];
                    searchFields[0] = new String[] { RankType.GENUS.getRank(), genus };
                    searchFields[1] = new String[] { NameIndexField.PHRASE.toString(), phrase };
                    searchFields[2] = new String[] { NameIndexField.VOUCHER.toString(), voucher };
                    searchFields[3] = new String[] { NameIndexField.SPECIFIC.toString(), specific };
                    hits = performSearch(searchFields, rank, cl, max, MatchType.PHRASE, false, queryParser.get());
                    if (hits.size() > 0) {
                        return hits;
                    }
                }
            }
            //now check for a "sounds like" match if we don't have an informal name
            if (pn != null && fuzzy && pn.isBinomial() && pn.getType() != NameType.informal
                    && pn.getType() != NameType.doubtful) {
                String genus = TaxonNameSoundEx.treatWord(pn.genusOrAbove, "genus");
                String specific = TaxonNameSoundEx.treatWord(pn.specificEpithet, "species");
                String infra = pn.infraSpecificEpithet == null ? null
                        : TaxonNameSoundEx.treatWord(pn.infraSpecificEpithet, "species");
                String[][] searchFields = new String[3][];
                searchFields[0] = new String[] { NameIndexField.GENUS_EX.toString(), genus };
                searchFields[1] = new String[] { NameIndexField.SPECIES_EX.toString(), specific };
                if (StringUtils.isNotEmpty(infra)) {
                    searchFields[2] = new String[] { NameIndexField.INFRA_EX.toString(), infra };
                } else {
                    searchFields[2] = new String[] { NameIndexField.INFRA_EX.toString(), "<null>" };
                }
                hits = performSearch(searchFields, rank, cl, max, MatchType.SOUNDEX, false, queryParser.get()); //don't want to check for homonyms yet...
                if (hits.size() > 0) {
                    return hits;
                }

            }

            return null;
        } catch (HomonymException e) {
            if (ignoreHomonym && e.getResults().size() == 1) {
                return e.getResults();
            } else {
                throw e;
            }
        } catch (IOException e) {
            log.warn(e.getMessage());
            return null;
        }

    }

    /**
     * If all results point to the same accepted concept it is returned.
     * Otherwise null is returned.
     *
     * @param hits
     * @return
     */
    private NameSearchResult getCommonAccepetedConcept(List<NameSearchResult> hits) {

        String acceptedLsid = hits.get(0).getAcceptedLsid();

        for (NameSearchResult hit : hits) {
            if (hit.getAcceptedLsid() == null)
                return null;
            if (!hit.getAcceptedLsid().equals(acceptedLsid))
                return null;
        }
        if (acceptedLsid != null)
            return searchForRecordByLsid(acceptedLsid);
        return null;
    }

    /**
     * Checks to see if the "soundex" matched results are ambiguous.
     *
     * @param hits
     * @return
     */
    private boolean areMatchesAmbiguous(List<NameSearchResult> hits) {
        if (hits.size() > 1) {
            //not ambiguous if all records have the smae accepted_lsid

        }
        return false;
    }

    /**
     * Update the rank for the name based on it containing rank strings.
     * Provides a bit of a sanity check on the name matching.  If we expect a
     * species we don't want to match on a genus
     *
     * @param name
     * @param rank
     */
    private RankType getUpdatedRank(String name, RankType rank) {
        Matcher matcher = RANK_MARKER.matcher(name);

        if (matcher.find()) {
            String value = name.substring(matcher.start(), matcher.end());
            log.debug("Changing rank to : " + value);
            if (value.endsWith("."))
                rank = RankType.getForCBRank(Rank.RANK_MARKER_MAP.get(value.substring(1, value.length() - 1)));
            log.debug("Using the new rank " + rank);
        }
        return rank;
    }

    /**
     * Checks to see if the supplied name is a synonym. A synonym will not have
     * an associated kingdom and genus in the index.
     *
     * @param name
     * @param rank
     * @param kingdom
     * @param genus
     * @param max
     * @throws SearchResultException
     */
    private void checkForSynonym(String name, RankType rank, String kingdom, String genus, int max)
            throws SearchResultException {
        //search on name field with name and empty kingdom and genus
        //search on the alternative names field with name and empty kingdom and genus
        //if we get a match that is a synonym verify match against IRMNG
    }

    private boolean doesSynonymMatch(String name, RankType rank, String kingdom, String genus) {
        return false;
    }

    private List<NameSearchResult> performSearch(String field, String value, RankType rank,
            LinnaeanRankClassification cl, int max, MatchType type, boolean checkHomo, QueryParser parser)
            throws IOException, SearchResultException {
        String[][] compValues = new String[1][];
        compValues[0] = new String[] { field, value };
        return performSearch(compValues, rank, cl, max, type, checkHomo, parser);
    }

    /**
     * Performs an index search based on the supplied field and name
     *
     * @param compulsoryValues 2D array of field and value mappings to perform the search on
     * @param rank      Optional rank of the value
     * @param cl        The high taxa that form the classification for the search item
     * @param max       The maximum number of results to return
     * @param type      The type of search that is being performed
     * @param checkHomo Whether or not the result should check for homonyms.
     * @param parser
     * @return
     * @throws IOException
     * @throws SearchResultException
     */

    private List<NameSearchResult> performSearch(String[][] compulsoryValues, RankType rank,
            LinnaeanRankClassification cl, int max, MatchType type, boolean checkHomo, QueryParser parser)
            throws IOException, SearchResultException {
        if (cbSearcher != null) {
            String scientificName = null;
            StringBuilder query = new StringBuilder();
            for (String[] values : compulsoryValues) {
                if (values[1] != null) {

                    query.append("+" + values[0] + ":\"" + values[1] + "\"");

                    if (values[0].equals(NameIndexField.NAME.toString()))
                        scientificName = values[1];
                }

            }

            if (rank != null) {
                //if the rank is below species include all names that are species level and below in case synonyms have changed ranks.
                query.append("+(");
                if (rank.getId() >= RankType.SPECIES.getId()) {
                    query.append(NameIndexField.RANK_ID.toString()).append(":[7000 TO 9999]");

                } else
                    query.append(NameIndexField.RANK.toString() + ":" + rank.getRank());
                //cater for the situation where the search term could be a synonym that does not have a rank
                // also ALA added concepts do NOT have ranks.
                query.append(" OR ").append(NameIndexField.iS_SYNONYM.toString()).append(":T OR ")
                        .append(NameIndexField.ALA).append(":T)");

            }
            if (cl != null) {
                query.append(cl.getLuceneSearchString(true));

            }

            try {

                TopDocs hits = cbSearcher.search(parser.parse(query.toString()), max);//cbSearcher.search(boolQuery, max);

                //now put the hits into the arrayof NameSearchResult
                List<NameSearchResult> results = new java.util.ArrayList<NameSearchResult>();

                for (ScoreDoc sdoc : hits.scoreDocs) {
                    NameSearchResult nsr = new NameSearchResult(cbReader.document(sdoc.doc), type);
                    results.add(nsr);
                }

                //HOMONYM CHECKS and other checks
                if (checkHomo) {

                    //check to see if one of the results is excluded
                    if (results.size() > 0) {
                        int exclCount = 0;
                        NameSearchResult notExcludedResult = null;
                        NameSearchResult excludedResult = null;
                        for (NameSearchResult nsr : results) {
                            if (nsr.getSynonymType() == au.org.ala.names.model.SynonymType.EXCLUDES) {
                                exclCount++;
                                excludedResult = nsr;
                            } else if (notExcludedResult == null) {
                                notExcludedResult = nsr;
                            }
                        }
                        if (exclCount > 0) {
                            //throw the basic exception if count == result size
                            if (exclCount == results.size()) {
                                throw new ExcludedNameException(
                                        "The result is a name that has been excluded from the NSL", excludedResult);
                            } else if (notExcludedResult != null) {
                                //one of the results was an excluded concept
                                throw new ExcludedNameException(
                                        "One of the results was excluded.  Use the nonExcludedName for your match.",
                                        notExcludedResult, excludedResult);
                            }
                        }
                    }

                    //check to see if we have a situtation where a species has been split into subspecies and a synonym exists to the subspecies
                    checkForSpeciesSplit(results);

                    //check to see if one of the results is a misapplied synonym
                    checkForMisapplied(results);

                    //check result level homonyms
                    //TODO 2012-04-17: Work out edge case issues for canonical matches...
                    //checkResultLevelHomonym(results);

                    //check to see if we have a cross rank homonym
                    //cross rank homonyms are resolvable if a rank has been supplied
                    if (rank == null) {
                        checkForCrossRankHomonym(results);
                    }

                    //check to see if the search criteria could represent an unresolved genus or species homonym
                    if (results.size() > 0) {
                        RankType resRank = results.get(0).getRank();
                        if ((resRank == RankType.GENUS || resRank == RankType.SPECIES)
                                || (results.get(0).isSynonym()
                                        && (rank == null || rank == RankType.GENUS || rank == RankType.SPECIES))) {
                            NameSearchResult result = (cl != null && StringUtils.isNotBlank(cl.getAuthorship()))
                                    ? validateHomonymByAuthor(results, scientificName, cl)
                                    : validateHomonyms(results, scientificName, cl);
                            results.clear();
                            results.add(result);
                        }
                    }
                }

                return results;
            } catch (ParseException e) {
                throw new SearchResultException("Error parsing " + query.toString() + "." + e.getMessage());
            }

        }
        return null;
    }

    private void checkResultLevelHomonym(List<NameSearchResult> results) throws HomonymException {
        //They are result level homonyms if multiple records and they don't all point to the same accepted concept...
        //They are not homonyms if they have different Kingdoms...
        if (results.size() > 1) {
            String lastAcceptedLsid = "";
            String lastKingdom = "";
            boolean lastWasSyn = false;
            for (NameSearchResult result : results) {
                if (result.isSynonym() || result.getRank().getId() >= 7000) {
                    String accepted = result.isSynonym() ? result.getAcceptedLsid() : result.getLsid();
                    String kingdom = result.getRankClassification().getKingdom() == null ? ""
                            : result.getRankClassification().getKingdom();
                    if (lastAcceptedLsid.length() > 0) {
                        if (!lastAcceptedLsid.equals(accepted)
                                && (lastKingdom.equals(kingdom) || lastWasSyn || result.isSynonym())) {
                            throw new HomonymException(accepted, results);
                        }
                    }
                    lastAcceptedLsid = accepted;
                    lastWasSyn = result.isSynonym();
                }
            }
        }

    }

    /**
     * Uses the distance between 2 strings to determine whether or not the
     * 2 strings are a close match.
     *
     * @param s1
     * @param s2
     * @param maxLengthDif The maximum differences in length that the 2 strings can  be
     * @param maxDist      The maximum distance between the 2 strings
     * @return
     */
    private boolean isCloseMatch(String s1, String s2, int maxLengthDif, int maxDist) {
        if (s1 != null && s2 != null && Math.abs(s1.length() - s2.length()) <= maxLengthDif) {
            //if the difference in the length of the 2 strings is at the most maxLengthDif characters compare the L distance
            //log.debug("Difference ("+s1 + ", " + s2+") : " + StringUtils.getLevenshteinDistance(s1, s2));
            return StringUtils.getLevenshteinDistance(s1, s2) <= maxDist;

        }
        return false;
    }

    private void checkForMisapplied(List<NameSearchResult> results) throws MisappliedException {
        if (results.size() >= 1) {
            NameSearchResult first = results.get(0);
            NameSearchResult second = (results.size() > 1) ? results.get(1) : null;
            if (first.getSynonymType() == au.org.ala.names.model.SynonymType.MISAPPLIED) {
                //the first result is misapplied
                NameSearchResult accepted = searchForRecordByLsid(first.getAcceptedLsid());
                throw new MisappliedException(accepted);
            } else if (!first.isSynonym() && second != null
                    && second.getSynonymType() == au.org.ala.names.model.SynonymType.MISAPPLIED) {
                NameSearchResult accepted = searchForRecordByLsid(second.getAcceptedLsid());
                throw new MisappliedException(first, accepted);
            }

        }
    }

    private void checkForSpeciesSplit(List<NameSearchResult> results) throws ParentSynonymChildException {
        //very specific situtation - there will be 2 results one accepted and the other a synonym to a child of the accepted name
        if (results.size() == 2) {
            if (results.get(0).isSynonym() != results.get(1).isSynonym()
                    && ((!results.get(0).isSynonym() && results.get(0).getRank() == RankType.SPECIES)
                            || (!results.get(1).isSynonym() && results.get(1).getRank() == RankType.SPECIES))) {
                NameSearchResult synResult = results.get(0).isSynonym() ? results.get(0) : results.get(1);
                NameSearchResult accResult = results.get(0).isSynonym() ? results.get(1) : results.get(0);
                NameSearchResult accSynResult = searchForRecordByLsid(synResult.getAcceptedLsid());
                if (accResult.getLeft() != null && accSynResult.getLeft() != null) {
                    int asyLeft = Integer.parseInt(accSynResult.getLeft());
                    if (asyLeft > Integer.parseInt(accResult.getLeft())
                            && asyLeft < Integer.parseInt(accResult.getRight()))
                        throw new ParentSynonymChildException(accResult, accSynResult);
                }
            }
        } else if (results.size() > 2) {
            //check to see if the all other results as synonyms of the same concept AND that concept is a child to the acc concept
            NameSearchResult accResult = null;
            String acceptedLsid = null;
            for (NameSearchResult nsr : results) {
                if (!nsr.isSynonym()) {
                    if (accResult == null)
                        accResult = nsr;
                    else
                        return;
                } else {
                    if (acceptedLsid != null) {
                        if (!acceptedLsid.equals(nsr.getAcceptedLsid()))
                            return;
                    } else {
                        acceptedLsid = nsr.getAcceptedLsid();
                    }
                }
            }
            //now check to see if the accepeted concept is a child of the accResult
            if (accResult != null && acceptedLsid != null) {
                NameSearchResult accSynResult = searchForRecordByLsid(acceptedLsid);
                if (accResult.getLeft() != null && accSynResult.getLeft() != null) {
                    int asyLeft = Integer.parseInt(accSynResult.getLeft());
                    if (asyLeft > Integer.parseInt(accResult.getLeft())
                            && asyLeft < Integer.parseInt(accResult.getRight()))
                        throw new ParentSynonymChildException(accResult, accSynResult);
                }
            }
        }

    }

    /**
     * Checks to see if the first result represents a scientific name that is a cross
     * rank homonym.
     * <p/>
     * This method should only be called if a rank has not been supplied
     *
     * @param results
     * @throws HomonymException When the first result's scientific name is a cross rank homonym
     */
    private void checkForCrossRankHomonym(List<NameSearchResult> results) throws HomonymException {
        if (results != null && results.size() > 0) {
            if (crossRankHomonyms
                    .contains(results.get(0).getRankClassification().getScientificName().toLowerCase()))
                throw new HomonymException(
                        "Cross rank homonym detected.  Please repeat search with a rank specified.", results);
        }
    }

    public NameSearchResult validateHomonymByAuthor(List<NameSearchResult> result, String name,
            LinnaeanRankClassification cl) throws HomonymException {
        //based on the facte that the author is included in the search the first result should be the most complete
        String suppliedAuthor = prepareAuthor(cl.getAuthorship());
        String resultAuthor = result.get(0).getRankClassification().getAuthorship();
        uk.ac.shef.wit.simmetrics.similaritymetrics.SmithWatermanGotoh similarity = new uk.ac.shef.wit.simmetrics.similaritymetrics.SmithWatermanGotoh();
        if (resultAuthor == null || similarity.getSimilarity(suppliedAuthor, resultAuthor) < 0.8) {
            //test based on the irmng list of homoymns
            validateHomonyms(result, name, cl);
        }
        return result.get(0);
    }

    private String prepareAuthor(String author) {
        return author.replaceAll("\\p{P}", "").replaceAll("\\p{Z}", "");
    }

    /**
     * Takes a result set that contains a homonym and then either throws a HomonymException
     * or returns the first result that matches the supplied taxa.
     * <p/>
     * AS OF 22/07/2010:
     * Homonyms are ONLY being tested if the result was a genus. According to Tony it is
     * very rare for a species to be a homonym with another species that belongs to a homonym
     * of the same genus.  Eventually we should get a list of the known cases to
     * test against.
     * <p/>
     * This should provide overall better name matching.
     * <p/>
     * 2011-01-14:
     * The homonym validation has been modified to include species level homonyms.
     * The indexing of the irmng species is different to the genus. IRMNG has a
     * more complete genus coverage than species. Thus only the species that are
     * homonyms are included in the index.
     *
     * @param results The results to on which to validate the homonyms
     * @param name The scientific name for the search
     * @param cl The high taxa that form the classification for the search item
     * @return
     * @throws HomonymException
     */
    public NameSearchResult validateHomonyms(List<NameSearchResult> results, String name,
            LinnaeanRankClassification cl) throws HomonymException {
        //get the rank so that we know which type of homonym we are evaluating
        RankType rank = results.get(0).getRank();

        //check to see if the homonym is resolvable given the details provide
        try {

            if (rank == null && results.get(0).isSynonym()) {
                cl = new LinnaeanRankClassification(null, null);
                String synName = results.get(0).getRankClassification().getScientificName();
                try {
                    ParsedName pn = parser.parse(synName);
                    if (pn.isBinomial()) {
                        cl.setSpecies(pn.canonicalName());
                        rank = RankType.SPECIES;
                    } else {
                        cl.setGenus(pn.genusOrAbove);
                        rank = RankType.GENUS;
                    }
                } catch (Exception e) {
                    //don't do anything
                }
            }

            if (cl == null) {
                if (rank == RankType.GENUS)
                    cl = new LinnaeanRankClassification(null, name);
                else if (rank == RankType.SPECIES) {
                    cl = new LinnaeanRankClassification(null, null);
                    cl.setSpecies(name);
                }

            }
            if (rank == RankType.GENUS && cl.getGenus() == null)
                cl.setGenus(name);
            else if (rank == RankType.SPECIES && cl.getSpecies() == null)
                cl.setSpecies(name);

            //Find out which rank the homonym can be resolved at.
            //This will indeicate which ranks of the supplied classifications need to match the result's classification in order to resolve the homonym
            RankType resolveLevel = resolveIRMNGHomonym(cl, rank);
            if (resolveLevel == null) {
                //there was no need to resolve the homonym
                return results.get(0);
            }
            //result must match at the kingdom level and resolveLevel of the taxonomy (TODO)
            log.debug("resolve the homonym at " + resolveLevel + " rank");

            //the first result should be the one that most closely resembles the required classification

            for (NameSearchResult result : results) {
                if (result.isSynonym()) {
                    //if the result is a synonym it is difficult to resolve the homonym.
                    //This is because synonyms do not have the corresponding classificaitons.
                    //There are 2 situations that we *may* be able to resolve the homonym
                    // 1) The IRMNG entry that resolves the homonym includes an "accepted" concepts
                    // 2) The resolveLevel is Kingdom and we make an assumption that the concept has not changed kingdoms
                    //    -- This is not always true especially with plants/algae/fungi and animalia/protozoa
                    //TODO algorithm to handle this situations see above comment

                } else {
                    if (cl.hasIdenticalClassification(result.getRankClassification(), resolveLevel))
                        return result;
                }
            }

            throw new HomonymException(results);
        } catch (HomonymException e) {
            e.setResults(results);
            throw e;
        }

    }

    /**
     * Uses the IRMNG index to determine whether or not a homonym can be resolved
     * with the supplied details.
     *
     * @return
     */
    private boolean isHomonymResolvable(LinnaeanRankClassification cl) {
        TopDocs results = getIRMNGGenus(cl, RankType.GENUS);
        if (results != null)
            return results.totalHits <= 1;
        return false;
    }

    /**
     * Multiple genus indicate that an unresolved homonym exists for the supplied
     * search details.
     *
     * @param cl   The classification to test
     * @param rank The rank level of the homonym being tested either RankType.GENUS or RankType.SPECIES
     */
    public TopDocs getIRMNGGenus(LinnaeanRankClassification cl, RankType rank) {
        if (cl != null && (cl.getGenus() != null || cl.getSpecies() != null)) {

            try {

                String searchString = "+rank:" + rank + " " + cl.getLuceneSearchString(false).trim();

                log.debug("Search string : " + searchString + " classification : " + cl);
                Query query = queryParser.get().parse(searchString);
                log.debug("getIRMNG query: " + query.toString());
                return irmngSearcher.search(query, 10);

            } catch (Exception e) {
                log.warn("Error searching IRMNG index.", e);
            }
        }
        return null;
    }

    /**
     * Attempt to resolve the homonym using the IRMNG index.
     * <p/>
     * The ability to resolve the homonym is dependent on the quality and quantity
     * of the higher taxa provided in the search via cl.
     *
     * @param cl The classification used to determine the rank at which the homonym is resolvable
     * @return
     * @throws HomonymException
     */
    public RankType resolveIRMNGHomonym(LinnaeanRankClassification cl, RankType rank) throws HomonymException {
        //check to see if we need to resolve the homonym
        if (cl.getGenus() != null || cl.getSpecies() != null) {
            LinnaeanRankClassification newcl = new LinnaeanRankClassification(null, cl.getGenus());
            if (rank == RankType.SPECIES)
                newcl.setSpecies(cl.getSpecies());
            if (cl != null && (cl.getGenus() != null || cl.getSpecies() != null)) {
                TopDocs results = getIRMNGGenus(newcl, rank);
                if (results == null || results.totalHits <= 1)
                    return null;

                if (cl != null && cl.getKingdom() != null) {
                    //create a local classification to work with we will only add a taxon when we are ready to try and resolve with it
                    newcl.setKingdom(cl.getKingdom());
                    //Step 1 search for kingdom and genus
                    results = getIRMNGGenus(newcl, rank);
                    if (results.totalHits == 1)
                        return RankType.KINGDOM;
                }
                //Step 2 add the phylum
                if (cl.getPhylum() != null && results.totalHits > 1) {
                    newcl.setPhylum(cl.getPhylum());
                    results = getIRMNGGenus(newcl, rank);
                    if (results.totalHits == 1)
                        return RankType.PHYLUM;
                    //This may not be a good idea
                    else if (results.totalHits == 0)
                        newcl.setPhylum(null);//just in case the phylum was specified incorrectly
                }
                //Step 3 try the class
                if (cl.getKlass() != null) {// && results.totalHits>1){
                    newcl.setKlass(cl.getKlass());
                    results = getIRMNGGenus(newcl, rank);
                    if (results.totalHits == 1)
                        return RankType.CLASS;

                }
                //step 4 try order
                if (cl.getOrder() != null && results.totalHits > 1) {
                    newcl.setOrder(cl.getOrder());
                    results = getIRMNGGenus(newcl, rank);
                    if (results.totalHits == 1)
                        return RankType.ORDER;
                }
                //step 5 try  the family
                if (cl.getFamily() != null && results.totalHits > 1) {
                    newcl.setFamily(cl.getFamily());
                    results = getIRMNGGenus(newcl, rank);
                    if (results.totalHits == 1)
                        return RankType.FAMILY;
                }
            }
        }
        throw new HomonymException("Problem resolving the classification: " + cl);
    }

    private String getValueForSynonym(String name) {
        //get the genus for the name
        try {
            ParsedName<?> pn = parser.parse(name);
            if (pn != null) {
                String genus = pn.getGenusOrAbove();
                LinnaeanRankClassification cl = new LinnaeanRankClassification(null, genus);
                TopDocs docs = getIRMNGGenus(cl, RankType.GENUS);
                try {
                    if (docs.totalHits > 0)
                        return irmngSearcher.doc(docs.scoreDocs[0].doc).get(RankType.KINGDOM.getRank());
                } catch (IOException e) {
                    log.warn("Unable to get value for synonym. ", e);
                }
                //seach for the genus in irmng
                //return simpleIndexLookup(irmngSearcher, RankType.GENUS.getRank(), genus, RankType.KINGDOM.getRank());
            }
        } catch (org.gbif.ecat.parser.UnparsableException e) {

        }
        return null;
    }

    /**
     * Performs a search on the common name index for the supplied name.
     *
     * @param commonName
     * @return
     */
    public String searchForLSIDCommonName(String commonName) {

        return getLSIDForUniqueCommonName(commonName);
    }

    /**
     * Returns the LSID for the CB name usage for the supplied common name.
     * <p/>
     * When the common name returns more than 1 hit a result is only returned if all the scientific names match
     *
     * @param name
     * @return
     */
    private String getLSIDForUniqueCommonName(String name) {
        if (name != null) {
            TermQuery query = new TermQuery(new Term(ALANameIndexer.IndexField.COMMON_NAME.toString(),
                    name.toUpperCase().replaceAll("[^A-Z0-9??]", "")));
            try {
                TopDocs results = vernSearcher.search(query, 10);
                //if all the results have the same scientific name result the LSID for the first
                String firstLsid = null;
                String firstName = null;
                log.debug("Number of matches for " + name + " " + results.totalHits);
                for (ScoreDoc sdoc : results.scoreDocs) {
                    org.apache.lucene.document.Document doc = vernSearcher.doc(sdoc.doc);
                    if (firstLsid == null) {
                        firstLsid = doc.get(ALANameIndexer.IndexField.LSID.toString());
                        firstName = doc.get(ALANameIndexer.IndexField.NAME.toString());
                    } else {
                        if (!doSciNamesMatch(firstName, doc.get(ALANameIndexer.IndexField.NAME.toString())))
                            return null;
                    }
                }
                //want to get the primary lsid for the taxon name thus we get the current lsid in the index...
                return getPrimaryLsid(firstLsid);
            } catch (IOException e) {
                //
                log.debug("Unable to access document for common name.", e);
            }
        }
        return null;
    }

    /**
     * Returns true when the parsed names match.
     *
     * @param n1
     * @param n2
     * @return
     */
    private boolean doSciNamesMatch(String n1, String n2) {
        try {
            ParsedName<?> pn1 = parser.parse(n1);
            ParsedName<?> pn2 = parser.parse(n2);
            if (pn1 != null && pn2 != null)
                return pn1.canonicalName().equals(pn2.canonicalName());
            return false;
        } catch (org.gbif.ecat.parser.UnparsableException e) {
            return false;
        }
    }

    /**
     * Performs a search on the supplied common name returning a NameSearchResult.
     * Useful if you required CB ID's etc.
     *
     * @param name
     * @return
     */
    public NameSearchResult searchForCommonName(String name) {
        NameSearchResult result = null;
        String lsid = getLSIDForUniqueCommonName(name);
        if (lsid != null) {
            //we need to get the CB ID for the supplied LSID
            result = searchForRecordByLsid(lsid);
            if (result != null)
                result.setMatchType(MatchType.VERNACULAR);
        }
        return result;
    }

    /**
     * Returns the primary LSID for the supplied lsid.
     * <p/>
     * This is useful in the situation where multiple LSIDs are associated with
     * a scientific name and there is a reference to the non-primary LSID.
     *
     * @param lsid
     * @return
     */
    public String getPrimaryLsid(String lsid) {
        if (lsid != null) {
            TermQuery tq = new TermQuery(new Term("lsid", lsid));
            try {
                org.apache.lucene.search.TopDocs results = idSearcher.search(tq, 1);
                if (results.totalHits > 0)
                    return idSearcher.doc(results.scoreDocs[0].doc).get("reallsid");
            } catch (IOException e) {
            }
        }

        return lsid;
    }

    public NameSearchResult searchForRecordByLsid(String lsid) {
        NameSearchResult result = null;
        try {
            List<NameSearchResult> results = performSearch(ALANameIndexer.IndexField.LSID.toString(), lsid, null,
                    null, 1, MatchType.DIRECT, false, idParser.get());
            if (results.size() > 0)
                result = results.get(0);
        } catch (Exception e) {
            //we are not checking for homonyms so this should never happen
            log.error("Unable to search for record by LSID");
        }
        if (result != null)
            result.setMatchType(MatchType.TAXON_ID);
        return result;
    }
}