org.zanata.service.impl.TranslationMemoryServiceImpl.java Source code

Introduction

Here is the source code for org.zanata.service.impl.TranslationMemoryServiceImpl.java
Source

/*
 * Copyright 2014, Red Hat, Inc. and individual contributors as indicated by the
 * @author tags. See the copyright.txt file in the distribution for a full
 * listing of individual contributors.
 *
 * This is free software; you can redistribute it and/or modify it under the
 * terms of the GNU Lesser General Public License as published by the Free
 * Software Foundation; either version 2.1 of the License, or (at your option)
 * any later version.
 *
 * This software is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
 * details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this software; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA, or see the FSF
 * site: http://www.fsf.org.
 */
package org.zanata.service.impl;

import static com.google.common.collect.Collections2.filter;
import static org.zanata.webtrans.shared.rest.dto.InternalTMSource.InternalTMChoice.SelectNone;
import static org.zanata.webtrans.shared.rest.dto.InternalTMSource.InternalTMChoice.SelectSome;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;

import javax.annotation.Nonnull;
import javax.enterprise.context.RequestScoped;
import javax.inject.Inject;

import org.apache.commons.lang3.StringUtils;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.WildcardQuery;
import org.hibernate.search.jpa.FullTextEntityManager;
import org.hibernate.search.jpa.FullTextQuery;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.zanata.common.ContentState;
import org.zanata.common.EntityStatus;
import org.zanata.common.LocaleId;
import org.zanata.hibernate.search.IndexFieldLabels;
import org.zanata.hibernate.search.TextContainerAnalyzerDiscriminator;
import org.zanata.jpa.FullText;
import org.zanata.model.HDocument;
import org.zanata.model.HLocale;
import org.zanata.model.HProject;
import org.zanata.model.HProjectIteration;
import org.zanata.model.HSimpleComment;
import org.zanata.model.HTextFlow;
import org.zanata.model.HTextFlowTarget;
import org.zanata.model.tm.TransMemoryUnit;
import org.zanata.rest.editor.dto.suggestion.Suggestion;
import org.zanata.rest.editor.dto.suggestion.SuggestionDetail;
import org.zanata.rest.editor.dto.suggestion.TextFlowSuggestionDetail;
import org.zanata.rest.editor.dto.suggestion.TransMemoryUnitSuggestionDetail;
import org.zanata.search.LevenshteinTokenUtil;
import org.zanata.search.LevenshteinUtil;
import org.zanata.service.TranslationMemoryService;
import org.zanata.util.SysProperties;
import org.zanata.util.UrlUtil;
import org.zanata.webtrans.shared.model.TransMemoryDetails;
import org.zanata.webtrans.shared.model.TransMemoryQuery;
import org.zanata.webtrans.shared.model.TransMemoryResultItem;
import org.zanata.webtrans.shared.rest.dto.InternalTMSource;
import org.zanata.webtrans.shared.rpc.HasSearchType;
import org.zanata.webtrans.shared.rpc.LuceneQuery;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Objects;
import com.google.common.base.Predicate;
import com.google.common.collect.Collections2;
import com.google.common.collect.Lists;
import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;

/**
 * @author Alex Eng <a href="mailto:aeng@redhat.com">aeng@redhat.com</a>
 */
@RequestScoped
public class TranslationMemoryServiceImpl implements TranslationMemoryService {
    private static final Logger log = LoggerFactory.getLogger(TranslationMemoryServiceImpl.class);

    private static final int SEARCH_MAX_RESULTS = SysProperties.getInt(SysProperties.TM_MAX_RESULTS, 20);
    private static final float BOOST_CONTENT = SysProperties.getFloat(SysProperties.TM_BOOST_CONTENT, 10.0F);
    private static final float BOOST_TFTID = SysProperties.getFloat(SysProperties.TM_BOOST_TFTID, 10.0F);
    private static final float BOOST_PROJECT = SysProperties.getFloat(SysProperties.TM_BOOST_PROJECT, 2.0F);
    private static final float BOOST_DOCID = SysProperties.getFloat(SysProperties.TM_BOOST_DOCID, 1.5F);
    private static final float BOOST_RESID = SysProperties.getFloat(SysProperties.TM_BOOST_RESID, 1.5F);
    private static final float BOOST_ITERATION = SysProperties.getFloat(SysProperties.TM_BOOST_ITERATION, 1.0F);

    // private static final float BOOST_PROJITERSLUG = SysProperties.getFloat(
    // SysProperties.TM_BOOST_PROJITERSLUG, 1.5f);
    private static final double MINIMUM_SIMILARITY = 1.0;
    private static final String LUCENE_KEY_WORDS = "(\\s*)(AND|OR|NOT)(\\s+)";
    private static final long serialVersionUID = -570503476695179297L;

    // sort desc by lastChanged of HTextFlowTarget
    private transient final Sort lastChangedSort = new Sort(SortField.FIELD_SCORE,
            new SortField(IndexFieldLabels.LAST_CHANGED_FIELD, SortField.Type.STRING, true));
    private transient final TermQuery newStateQuery = new TermQuery(
            new Term(IndexFieldLabels.CONTENT_STATE_FIELD, ContentState.New.toString()));
    private transient final TermQuery needReviewStateQuery = new TermQuery(
            new Term(IndexFieldLabels.CONTENT_STATE_FIELD, ContentState.NeedReview.toString()));
    private transient final TermQuery rejectedStateQuery = new TermQuery(
            new Term(IndexFieldLabels.CONTENT_STATE_FIELD, ContentState.Rejected.toString()));

    @SuppressFBWarnings(value = "SE_BAD_FIELD")
    private FullTextEntityManager entityManager;
    private UrlUtil urlUtil;

    @Inject
    public TranslationMemoryServiceImpl(@FullText FullTextEntityManager entityManager, UrlUtil urlUtil) {
        this.entityManager = entityManager;
        this.urlUtil = urlUtil;
    }

    @SuppressWarnings("unused")
    public TranslationMemoryServiceImpl() {
    }

    @Override
    public TransMemoryDetails getTransMemoryDetail(HLocale hLocale, HTextFlow tf) {
        HTextFlowTarget tft = tf.getTargets().get(hLocale.getId());
        HDocument document = tf.getDocument();
        HProjectIteration version = document.getProjectIteration();
        HProject project = version.getProject();
        String msgContext = (tf.getPotEntryData() == null) ? null : tf.getPotEntryData().getContext();
        String username = null;
        if (tft.getLastModifiedBy() != null && tft.getLastModifiedBy().hasAccount()) {
            username = tft.getLastModifiedBy().getAccount().getUsername();
        }
        String url = urlUtil.editorTransUnitUrl(project.getSlug(), version.getSlug(), hLocale.getLocaleId(),
                document.getSourceLocaleId(), document.getDocId(), tf.getId());
        return new TransMemoryDetails(HSimpleComment.toString(tf.getComment()),
                HSimpleComment.toString(tft.getComment()), project.getName(), version.getSlug(),
                tf.getDocument().getDocId(), tf.getResId(), msgContext, tft.getState(), username,
                tft.getLastChanged(), url);
    }

    /**
     * TODO this is only used by test. Should we remove it?
     * This is used by CopyTrans, with ContentHash search in lucene. Returns
     * first entry of the matches which sort by HTextFlowTarget.lastChanged DESC
     *
     * @param textFlow
     * @param targetLocaleId
     * @param sourceLocaleId
     * @param checkContext
     * @param checkDocument
     * @param checkProject
     */
    @Override
    public Optional<HTextFlowTarget> searchBestMatchTransMemory(final HTextFlow textFlow, LocaleId targetLocaleId,
            LocaleId sourceLocaleId, boolean checkContext, boolean checkDocument, boolean checkProject) {
        TransMemoryQuery query = buildTMQuery(textFlow, HasSearchType.SearchType.CONTENT_HASH, checkContext,
                checkDocument, checkProject, false, InternalTMSource.SELECT_ALL);
        Collection<Object[]> matches = findMatchingTranslation(targetLocaleId, sourceLocaleId, query, 0,
                Optional.empty(), HTextFlowTarget.class);
        if (matches.isEmpty()) {
            return Optional.empty();
        }
        return Optional.of((HTextFlowTarget) matches.iterator().next()[1]);
    }

    /**
     * This is used by TMMerge. Returns first entry of the matches which sort by
     * similarityPercent, sourceContents, and contents size.
     * @param textFlow
     * @param targetLocaleId
     * @param sourceLocaleId
     * @param checkContext
     * @param checkDocument
     * @param checkProject
     * @param thresholdPercent
     * @param internalTMSource
     */
    @Override
    public Optional<TransMemoryResultItem> searchBestMatchTransMemory(HTextFlow textFlow, LocaleId targetLocaleId,
            LocaleId sourceLocaleId, boolean checkContext, boolean checkDocument, boolean checkProject,
            int thresholdPercent, InternalTMSource internalTMSource) {
        TransMemoryQuery query = buildTMQuery(textFlow, HasSearchType.SearchType.FUZZY_PLURAL, checkContext,
                checkDocument, checkProject, true, internalTMSource);
        List<TransMemoryResultItem> tmResults = searchTransMemory(targetLocaleId, sourceLocaleId, query);
        // findTMAboveThreshold
        Collection<TransMemoryResultItem> aboveThreshold = filter(tmResults,
                new TransMemoryAboveThresholdPredicate(thresholdPercent));
        if (aboveThreshold.isEmpty()) {
            return Optional.empty();
        }
        return Optional.of(aboveThreshold.iterator().next());
    }

    @Override
    public List<TransMemoryResultItem> searchTransMemory(LocaleId targetLocaleId, LocaleId sourceLocaleId,
            TransMemoryQuery transMemoryQuery) {
        // NB: If we want to, we could pass the TFT id from the editor
        // via GWT-RPC(TransMemoryQuery), allowing Lucene to rank results
        // by metadata too.
        Optional<Long> textFlowTargetId = Optional.empty();
        Collection<Object[]> matches = findMatchingTranslation(targetLocaleId, sourceLocaleId, transMemoryQuery,
                SEARCH_MAX_RESULTS, textFlowTargetId, HTextFlowTarget.class, TransMemoryUnit.class);
        Map<TMKey, TransMemoryResultItem> matchesMap = new LinkedHashMap<TMKey, TransMemoryResultItem>(
                matches.size());
        for (Object[] match : matches) {
            processIndexMatch(transMemoryQuery, matchesMap, match, sourceLocaleId, targetLocaleId);
        }
        List<TransMemoryResultItem> results = Lists.newArrayList(matchesMap.values());
        Collections.sort(results, new TransMemoryResultComparator(transMemoryQuery.getInternalTMSource()));
        return results;
    }

    @Override
    public List<Suggestion> searchTransMemoryWithDetails(LocaleId targetLocaleId, LocaleId sourceLocaleId,
            TransMemoryQuery transMemoryQuery, Optional<Long> textFlowTargetId) {
        return new QueryMatchProcessor(transMemoryQuery, sourceLocaleId, targetLocaleId, textFlowTargetId)
                .process();
    }

    private TransMemoryQuery buildTMQuery(HTextFlow textFlow, HasSearchType.SearchType searchType,
            boolean checkContext, boolean checkDocument, boolean checkProject, boolean includeOwnTranslation,
            InternalTMSource internalTMSource) {
        TransMemoryQuery.Condition project = new TransMemoryQuery.Condition(checkProject,
                textFlow.getDocument().getProjectIteration().getProject().getId().toString());
        TransMemoryQuery.Condition document = new TransMemoryQuery.Condition(checkDocument,
                textFlow.getDocument().getDocId());
        TransMemoryQuery.Condition res = new TransMemoryQuery.Condition(checkContext, textFlow.getResId());
        TransMemoryQuery query;
        if (searchType.equals(HasSearchType.SearchType.CONTENT_HASH)) {
            query = new TransMemoryQuery(textFlow.getContentHash(), searchType, project, document, res);
        } else {
            query = new TransMemoryQuery(textFlow.getContents(), searchType, project, document, res,
                    internalTMSource);
        }
        if (!includeOwnTranslation) {
            query.setIncludeOwnTranslation(false, textFlow.getId().toString());
        }
        return query;
    }

    /**
     * return match[0] = (float)score, match[1] = entity(HTextFlowTarget or
     * TransMemoryUnit)
     *
     * @param targetLocaleId
     * @param sourceLocaleId
     * @param transMemoryQuery
     * @param maxResults
     */
    private Collection<Object[]> findMatchingTranslation(LocaleId targetLocaleId, LocaleId sourceLocaleId,
            TransMemoryQuery transMemoryQuery, int maxResults, Optional<Long> textFlowTargetId,
            @Nonnull Class<?>... entityTypes) {
        try {
            if (entityTypes == null || entityTypes.length == 0) {
                throw new RuntimeException(
                        "Need entity type (HTextFlowTarget.class or TransMemoryUnit.class) for TM search");
            }
            List<Object[]> matches = getSearchResult(transMemoryQuery, sourceLocaleId, targetLocaleId, maxResults,
                    textFlowTargetId, entityTypes);
            // filter out invalid target
            // TODO filter by entityTypes as well
            // TODO returning a filtered collection might be overkill
            return Collections2.filter(matches, new ValidTargetFilterPredicate(targetLocaleId));
        } catch (ParseException e) {
            if (e.getCause() instanceof BooleanQuery.TooManyClauses) {
                log.warn("BooleanQuery.TooManyClauses, query too long to parse \'"
                        + StringUtils.left(transMemoryQuery.getQueries().get(0), 80) + "...\'");
            } else {
                if (transMemoryQuery.getSearchType() == HasSearchType.SearchType.RAW) {
                    // TODO tell the user
                    log.info("Can\'t parse raw query {}", transMemoryQuery);
                } else {
                    // escaping failed!
                    log.error("Can\'t parse query " + transMemoryQuery, e);
                }
            }
        } catch (RuntimeException e) {
            log.error("Runtime exception:", e);
        }
        return Lists.newArrayList();
    }

    private void processIndexMatch(TransMemoryQuery transMemoryQuery, Map<TMKey, TransMemoryResultItem> matchesMap,
            Object[] match, LocaleId sourceLocaleId, LocaleId targetLocaleId) {
        Object entity = match[1];
        if (entity instanceof HTextFlowTarget) {
            HTextFlowTarget textFlowTarget = (HTextFlowTarget) entity;
            ArrayList<String> textFlowContents = Lists.newArrayList(textFlowTarget.getTextFlow().getContents());
            ArrayList<String> targetContents = Lists.newArrayList(textFlowTarget.getContents());
            TransMemoryResultItem.MatchType matchType = fromContentState(textFlowTarget.getState());
            double percent = calculateSimilarityPercentage(transMemoryQuery, textFlowContents);
            if (percent < MINIMUM_SIMILARITY) {
                log.debug("Ignoring TM - {} with less than {}% matching.", textFlowContents, MINIMUM_SIMILARITY);
                return;
            }
            Long fromVersionId = textFlowTarget.getTextFlow().getDocument().getProjectIteration().getId();
            TransMemoryResultItem item = createOrGetResultItem(matchesMap, match, matchType, textFlowContents,
                    targetContents, percent, fromVersionId);
            addTextFlowTargetToResultMatches(textFlowTarget, item);
        } else if (entity instanceof TransMemoryUnit) {
            TransMemoryUnit transUnit = (TransMemoryUnit) entity;
            ArrayList<String> sourceContents = Lists.newArrayList(
                    transUnit.getTransUnitVariants().get(sourceLocaleId.getId()).getPlainTextSegment());
            ArrayList<String> targetContents = Lists.newArrayList(
                    transUnit.getTransUnitVariants().get(targetLocaleId.getId()).getPlainTextSegment());
            double percent = calculateSimilarityPercentage(transMemoryQuery, sourceContents);
            if (percent < MINIMUM_SIMILARITY) {
                log.debug("Ignoring TM - {} with less than {}% matching.", sourceContents, MINIMUM_SIMILARITY);
                return;
            }
            TransMemoryResultItem item = createOrGetResultItem(matchesMap, match,
                    TransMemoryResultItem.MatchType.Imported, sourceContents, targetContents, percent, null);
            addTransMemoryUnitToResultMatches(item, transUnit);
        }
    }

    private static double calculateSimilarityPercentage(TransMemoryQuery query, List<String> sourceContents) {
        double percent;
        if (query.getSearchType() == HasSearchType.SearchType.CONTENT_HASH) {
            return 100;
        } else if (query.getSearchType() == HasSearchType.SearchType.FUZZY_PLURAL) {
            percent = 100 * LevenshteinTokenUtil.getSimilarity(query.getQueries(), sourceContents);
            if (percent > 99.99) {
                // make sure we only get 100% similarity if every character
                // matches
                percent = 100 * LevenshteinUtil.getSimilarity(query.getQueries(), sourceContents);
            }
        } else {
            final String searchText = query.getQueries().get(0);
            percent = 100 * LevenshteinTokenUtil.getSimilarity(searchText, sourceContents);
            if (percent > 99.99) {
                // make sure we only get 100% similarity if every character
                // matches
                percent = 100 * LevenshteinUtil.getSimilarity(searchText, sourceContents);
            }
        }
        return percent;
    }

    private static TransMemoryResultItem.MatchType fromContentState(ContentState contentState) {
        switch (contentState) {
        case Approved:
            return TransMemoryResultItem.MatchType.ApprovedInternal;

        case Translated:
            return TransMemoryResultItem.MatchType.TranslatedInternal;

        default:
            throw new RuntimeException("Cannot map content state: " + contentState);

        }
    }

    /**
     * Look up the result item for the given source and target contents.
     *
     * If no item is found, a new one is added to the map and returned.
     *
     * @return the item for the given source and target contents, which may be
     *         newly created.
     */
    private TransMemoryResultItem createOrGetResultItem(Map<TMKey, TransMemoryResultItem> matchesMap,
            Object[] match, TransMemoryResultItem.MatchType matchType, ArrayList<String> sourceContents,
            ArrayList<String> targetContents, double percent, Long fromVersionId) {
        TMKey key = new TMKey(sourceContents, targetContents);
        TransMemoryResultItem item = matchesMap.get(key);
        if (item == null) {
            float score = (Float) match[0];
            item = new TransMemoryResultItem(sourceContents, targetContents, matchType, score, percent,
                    fromVersionId);
            matchesMap.put(key, item);
        }
        return item;
    }

    private void addTransMemoryUnitToResultMatches(TransMemoryResultItem item, TransMemoryUnit transMemoryUnit) {
        item.addSourceId(transMemoryUnit.getId());
        item.incMatchCount();
        item.addOrigin(transMemoryUnit.getTranslationMemory().getSlug());
    }

    private void addTextFlowTargetToResultMatches(HTextFlowTarget textFlowTarget, TransMemoryResultItem item) {
        item.incMatchCount();
        // TODO change sourceId to include type, then include the id of imported
        // matches
        item.addSourceId(textFlowTarget.getTextFlow().getId());
        // Workaround: since Imported does not have a details view in the
        // current editor,
        // I am treating it as the lowest priority, so will be overwritten by
        // other match types.
        // A better fix is to have the DTO hold all the match types so the
        // editor
        // can show them in whatever way is most sensible.
        ContentState state = textFlowTarget.getState();
        if (state == ContentState.Approved || item.getMatchType() == TransMemoryResultItem.MatchType.Imported) {
            item.setMatchType(fromContentState(state));
        }
    }

    /**
     * NB just because this Comparator returns 0 doesn't mean the matches are
     * identical.
     */
    private static class TransMemoryResultComparator implements Comparator<TransMemoryResultItem>, Serializable {

        private static final long serialVersionUID = 1L;
        private final InternalTMSource internalTMSource;

        public TransMemoryResultComparator(InternalTMSource internalTMSource) {
            this.internalTMSource = internalTMSource;
        }

        @Override
        public int compare(TransMemoryResultItem m1, TransMemoryResultItem m2) {
            int result;
            result = Double.compare(m2.getSimilarityPercent(), m1.getSimilarityPercent());
            if (result != 0) {
                // sort higher similarity first
                return result;
            }
            result = compare(m2.getSourceContents(), m1.getSourceContents());
            if (result != 0) {
                // sort longer string lists first (more plural forms)
                return result;
            }
            result = m2.getMatchType().compareTo(m1.getMatchType());
            if (result != 0) {
                // sort match type
                return result;
            }
            // if TM is from TMX, getFromVersionId is null.
            // if internal TM source is SelectSome,
            // we have a list of version ids to prioritise results
            if (m2.getFromVersionId() != null && m1.getFromVersionId() != null
                    && internalTMSource.getChoice() == SelectSome) {
                List<Long> fromVersionIds = internalTMSource.getFilteredProjectVersionIds();
                int indexOfM2 = fromVersionIds.indexOf(m2.getFromVersionId());
                int indexOfM1 = fromVersionIds.indexOf(m1.getFromVersionId());
                // sort higher when index is lower
                // if index is -1, something wrong with our lucene query or index
                if (indexOfM1 < 0 || indexOfM2 < 0) {
                    log.warn("Having TM result not from requested source versions:{}", fromVersionIds);
                    if (indexOfM1 < 0 && indexOfM2 >= 0) {
                        // m2 rank higher since it's from the defined source versions
                        return 1;
                    } else if (indexOfM2 < 0 && indexOfM1 >= 0) {
                        // m1 is from defined source versions
                        return -1;
                    } else {
                        // they are both not from defined source versions
                        return result;
                    }

                }
                return Integer.compare(indexOfM1, indexOfM2);
            }
            return result;
        }

        private int compare(List<String> list1, List<String> list2) {
            for (int i = 0; i < list1.size() && i < list2.size(); i++) {
                int comp = list1.get(i).compareTo(list2.get(i));
                if (comp != 0) {
                    return comp;
                }
            }
            return list1.size() - list2.size();
        }
    }

    private static class TMKey {
        private final List<String> textFlowContents;
        private final List<String> targetContents;

        private TMKey(List<String> textFlowContents, List<String> targetContents) {
            this.textFlowContents = textFlowContents;
            this.targetContents = targetContents;
        }

        @Override
        public boolean equals(Object obj) {
            if (obj instanceof TMKey) {
                TMKey o = (TMKey) obj;
                return textFlowContents.equals(o.textFlowContents) && targetContents.equals(o.targetContents);
            }
            return false;
        }

        @Override
        public int hashCode() {
            return Objects.hashCode(textFlowContents, targetContents);
        }
    }

    private void validateQueryLength(String query) {
        if (StringUtils.length(query) > LuceneQuery.QUERY_MAX_LENGTH) {
            throw new RuntimeException("Query string exceed max length: " + LuceneQuery.QUERY_MAX_LENGTH + "=\'"
                    + StringUtils.left(query, 80) + "\'");
        }
    }

    private List<Object[]> getSearchResult(TransMemoryQuery query, LocaleId sourceLocale, LocaleId targetLocale,
            int maxResult, Optional<Long> textFlowTargetId, Class<?>... entities) throws ParseException {
        String queryText = null;
        String[] multiQueryText = null;
        switch (query.getSearchType()) {
        // 'Lucene' in the editor
        case RAW:
            queryText = query.getQueries().get(0);
            validateQueryLength(queryText);
            if (StringUtils.isBlank(queryText)) {
                return Lists.newArrayList();
            }
            break;

        // 'Fuzzy' in the editor
        case FUZZY:
            validateQueryLength(query.getQueries().get(0));
            queryText = escape(query.getQueries().get(0));
            if (StringUtils.isBlank(queryText)) {
                return Lists.newArrayList();
            }
            break;

        // 'Phrase' in the editor
        case EXACT:
            validateQueryLength(query.getQueries().get(0));
            queryText = "\"" + escape(query.getQueries().get(0)) + "\"";
            if (StringUtils.isBlank(queryText)) {
                return Lists.newArrayList();
            }
            break;

        // 'Fuzzy' in the editor, plus it is a plural entry
        case FUZZY_PLURAL:
            multiQueryText = new String[query.getQueries().size()];
            for (int i = 0; i < query.getQueries().size(); i++) {
                multiQueryText[i] = escape(query.getQueries().get(i));
                if (StringUtils.isBlank(multiQueryText[i])) {
                    return Lists.newArrayList();
                }
            }
            break;

        // Used by copyTrans for 100% match with source string
        case CONTENT_HASH:
            queryText = query.getQueries().get(0);
            validateQueryLength(queryText);
            if (StringUtils.isBlank(queryText)) {
                return Lists.newArrayList();
            }
            break;

        default:
            throw new RuntimeException("Unknown query type: " + query.getSearchType());

        }
        // Use the TextFlowTarget and TransMemoryUnit index
        Query textQuery = generateQuery(query, sourceLocale, targetLocale, textFlowTargetId, queryText,
                multiQueryText, IndexFieldLabels.TF_CONTENT_FIELDS);
        log.debug("Executing Lucene query: {}", textQuery);
        FullTextQuery ftQuery = entityManager.createFullTextQuery(textQuery, entities);
        ftQuery.setProjection(FullTextQuery.SCORE, FullTextQuery.THIS);
        if (maxResult > 0) {
            ftQuery.setMaxResults(maxResult);
        }
        ftQuery.setSort(lastChangedSort);
        List<Object[]> resultList = (List<Object[]>) ftQuery.getResultList();
        if (!resultList.isEmpty() && resultList.size() == maxResult) {
            log.warn(
                    "Lucene query returned {} results (out of approx {}). Increasing {} might produce more matches.",
                    resultList.size(), ftQuery.getResultSize(), SysProperties.TM_MAX_RESULTS);
            logQueryResults(resultList);
        }
        return resultList;
    }

    @VisibleForTesting
    protected static String escape(String string) {
        return QueryParser.escape(string).replaceAll(LUCENE_KEY_WORDS, "$1\"$2\"$3");
    }

    private void logQueryResults(List<Object[]> resultList) {
        if (log.isTraceEnabled()) {
            // resultList.get() could be a little slow if resultList is a
            // LinkedList, but in practice HSearch seems to use ArrayLists,
            // plus we only iterate up to 10 elements.
            int numToLog = Math.min(resultList.size(), 10);
            for (int i = 0; i < numToLog; i++) {
                Object[] arr = resultList.get(i);
                Number score = (Number) arr[0];
                Object entity = arr[1];
                log.trace("{}[{}]: {}", i, score, entity);
            }
        }
    }

    /**
     * Generate the query to match all source contents in all the searchable
     * indexes. (HTextFlowTarget and TransMemoryUnit)
     *
     * @param query
     * @param sourceLocale
     * @param targetLocale
     * @param queryText
     * @param multiQueryText
     * @param srcContentFields
     * @return
     * @throws ParseException
     */
    private Query generateQuery(TransMemoryQuery query, LocaleId sourceLocale, LocaleId targetLocale,
            Optional<Long> textFlowTargetId, String queryText, String[] multiQueryText, String[] srcContentFields)
            throws ParseException {
        Query textFlowTargetQuery = generateTextFlowTargetQuery(query, sourceLocale, targetLocale, textFlowTargetId,
                queryText, multiQueryText, srcContentFields);
        if (query.getSearchType() == HasSearchType.SearchType.CONTENT_HASH) {
            return textFlowTargetQuery;
        } else {
            String tmQueryText = query.getSearchType() == HasSearchType.SearchType.FUZZY_PLURAL ? multiQueryText[0]
                    : queryText;
            Query transUnitQuery = generateTransMemoryQuery(sourceLocale, targetLocale, tmQueryText);
            // Join the queries for each different type
            if (query.getInternalTMSource().getChoice() != SelectNone) {
                return join(BooleanClause.Occur.SHOULD, textFlowTargetQuery, transUnitQuery);
            } else {
                // user don't want to search from internal TM
                return transUnitQuery;
            }
        }
    }

    /**
     * Generates the Hibernate Search Query that will search for
     * {@link HTextFlowTarget} objects for matches.
     *
     * @param queryParams
     * @param sourceLocale
     * @param targetLocale
     * @param queryText
     * @param multiQueryText
     * @param srcContentFields
     * @return
     * @throws ParseException
     */
    private Query generateTextFlowTargetQuery(TransMemoryQuery queryParams, LocaleId sourceLocale,
            LocaleId targetLocale, Optional<Long> textFlowTargetId, String queryText, String[] multiQueryText,
            String[] srcContentFields) throws ParseException {
        Query contentQuery = buildContentQuery(queryParams, sourceLocale, queryText, multiQueryText,
                srcContentFields);
        contentQuery.setBoost(BOOST_CONTENT);
        BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder();
        queryBuilder.add(contentQuery, BooleanClause.Occur.MUST);
        if (textFlowTargetId.isPresent()) {
            HTextFlowTarget tft = entityManager.find(HTextFlowTarget.class, textFlowTargetId.get());
            if (tft != null) {
                HTextFlow tf = tft.getTextFlow();
                HDocument doc = tf.getDocument();
                HProjectIteration iter = doc.getProjectIteration();
                HProject proj = iter.getProject();
                addTermQueryWithBoost(queryBuilder, "id", String.valueOf(textFlowTargetId), BOOST_TFTID);
                addTermQueryWithBoost(queryBuilder, "project", proj.getSlug(), BOOST_PROJECT);
                addTermQueryWithBoost(queryBuilder, "documentId", doc.getDocId(), BOOST_DOCID);
                addTermQueryWithBoost(queryBuilder, "textFlow.resId", tf.getResId(), BOOST_RESID);
                addTermQueryWithBoost(queryBuilder, "iteration", iter.getSlug(), BOOST_ITERATION);
                // TODO add projiterslug to the index, replacing iteration slug
                // String projIterSlug = proj.getSlug()+iter.getSlug();
                // addTermQueryWithBoost(query, "projIterSlug", projIterSlug,
                // BOOST_PROJITERSLUG);
            } else {
                log.warn("Ignoring invalid textFlowTargetId: {}", textFlowTargetId);
            }
        }
        TermQuery localeQuery = new TermQuery(new Term(IndexFieldLabels.LOCALE_ID_FIELD, targetLocale.getId()));
        queryBuilder.add(localeQuery, BooleanClause.Occur.MUST);
        buildContextQuery(queryBuilder, queryParams);
        // exclude own translation
        if (!queryParams.getIncludeOwnTranslation().isCheck()) {
            TermQuery tmIdQuery = new TermQuery(
                    new Term(IndexFieldLabels.TF_ID, queryParams.getIncludeOwnTranslation().getValue()));
            queryBuilder.add(tmIdQuery, BooleanClause.Occur.MUST_NOT);
        }
        queryBuilder.add(newStateQuery, BooleanClause.Occur.MUST_NOT);
        queryBuilder.add(needReviewStateQuery, BooleanClause.Occur.MUST_NOT);
        queryBuilder.add(rejectedStateQuery, BooleanClause.Occur.MUST_NOT);
        return queryBuilder.build();
    }

    private static void addTermQueryWithBoost(BooleanQuery.Builder builder, String fld, String txt, float boost) {
        TermQuery q = new TermQuery(new Term(fld, txt));
        q.setBoost(boost);
        builder.add(q, BooleanClause.Occur.SHOULD);
    }

    /**
     * Build query for project, document and resId context
     *
     * @param queryParams
     * @return
     */
    private void buildContextQuery(BooleanQuery.Builder builder, TransMemoryQuery queryParams) {
        if (queryParams.getProject() != null) {
            TermQuery projectQuery = new TermQuery(
                    new Term(IndexFieldLabels.PROJECT_ID_FIELD, queryParams.getProject().getValue()));
            if (queryParams.getProject().isCheck()) {
                builder.add(projectQuery, BooleanClause.Occur.MUST);
            } else {
                builder.add(projectQuery, BooleanClause.Occur.SHOULD);
            }
        }
        if (queryParams.getDocument() != null) {
            TermQuery docQuery = new TermQuery(
                    new Term(IndexFieldLabels.DOCUMENT_ID_FIELD, queryParams.getDocument().getValue()));
            if (queryParams.getDocument().isCheck()) {
                builder.add(docQuery, BooleanClause.Occur.MUST);
            } else {
                builder.add(docQuery, BooleanClause.Occur.SHOULD);
            }
        }
        if (queryParams.getRes() != null) {
            TermQuery resIdQuery = new TermQuery(
                    new Term(IndexFieldLabels.TF_RES_ID, queryParams.getRes().getValue()));
            if (queryParams.getRes().isCheck()) {
                builder.add(resIdQuery, BooleanClause.Occur.MUST);
            } else {
                builder.add(resIdQuery, BooleanClause.Occur.SHOULD);
            }
        }
        if (queryParams.getInternalTMSource().getChoice() == SelectSome) {
            BooleanQuery.Builder fromVersions = new BooleanQuery.Builder();
            queryParams.getInternalTMSource().getFilteredProjectVersionIds().forEach(projectIterationId -> {
                TermQuery fromVersionQuery = new TermQuery(
                        new Term(IndexFieldLabels.PROJECT_VERSION_ID_FIELD, projectIterationId.toString()));

                fromVersions.add(fromVersionQuery, BooleanClause.Occur.SHOULD);
            });
            builder.add(fromVersions.build(), BooleanClause.Occur.MUST);
        }
    }

    private Query buildContentQuery(TransMemoryQuery query, LocaleId sourceLocale, String queryText,
            String[] multiQueryText, String[] srcContentFields) throws ParseException {
        if (query.getSearchType() == HasSearchType.SearchType.CONTENT_HASH) {
            return new TermQuery(new Term(IndexFieldLabels.TF_CONTENT_HASH, queryText));
        } else {
            // Analyzer is determined by the source language,
            // because we are querying the source text.
            String analyzerDefName = TextContainerAnalyzerDiscriminator
                    .getAnalyzerDefinitionName(sourceLocale.getId());
            Analyzer sourceAnalyzer = entityManager.getSearchFactory().getAnalyzer(analyzerDefName);
            if (query.getSearchType() == HasSearchType.SearchType.FUZZY_PLURAL) {
                int queriesSize = multiQueryText.length;
                if (queriesSize > srcContentFields.length) {
                    log.warn("query contains {} fields, but we only index {}", queriesSize,
                            srcContentFields.length);
                }
                String[] searchFields = new String[queriesSize];
                System.arraycopy(srcContentFields, 0, searchFields, 0, queriesSize);
                return MultiFieldQueryParser.parse(multiQueryText, searchFields, sourceAnalyzer);
            } else {
                MultiFieldQueryParser parser = new MultiFieldQueryParser(srcContentFields, sourceAnalyzer);
                return parser.parse(queryText);
            }
        }
    }

    /**
     * Generates the Hibernate Search Query that will search for
     * {@link org.zanata.model.tm.TransMemoryUnit} objects for matches.
     *
     * @param sourceLocale
     * @param targetLocale
     * @param queryText
     * @return
     */
    private Query generateTransMemoryQuery(LocaleId sourceLocale, LocaleId targetLocale, String queryText)
            throws ParseException {
        // Analyzer determined by the language
        String analyzerDefName = TextContainerAnalyzerDiscriminator.getAnalyzerDefinitionName(sourceLocale.getId());
        Analyzer analyzer = entityManager.getSearchFactory().getAnalyzer(analyzerDefName);
        QueryParser parser = new QueryParser(IndexFieldLabels.TRANS_UNIT_VARIANT_FIELD + sourceLocale.getId(),
                analyzer);
        Query sourceContentQuery = parser.parse(queryText);
        WildcardQuery targetContentQuery = new WildcardQuery(
                new Term(IndexFieldLabels.TRANS_UNIT_VARIANT_FIELD + targetLocale.getId(), "*"));
        return join(BooleanClause.Occur.MUST, sourceContentQuery, targetContentQuery);
    }

    /**
     * Joins a given set of queries into a single one with the specified
     * occurrence condition.
     *
     * @param condition
     *            The occurrence condition all the joined queries will have.
     * @param queries
     *            The queries to be joined.
     * @return A single query that evaluates all the given sub-queries using the
     *         given occurence condition.
     */
    private static Query join(BooleanClause.Occur condition, Query... queries) {
        BooleanQuery.Builder joinedQuery = new BooleanQuery.Builder();
        for (Query q : queries) {
            joinedQuery.add(q, condition);
        }
        return joinedQuery.build();
    }

    private static final class TransMemoryAboveThresholdPredicate implements Predicate<TransMemoryResultItem> {
        private final int approvedThreshold;

        public TransMemoryAboveThresholdPredicate(int approvedThreshold) {
            this.approvedThreshold = approvedThreshold;
        }

        @Override
        public boolean apply(TransMemoryResultItem tmResult) {
            return tmResult != null ? (int) tmResult.getSimilarityPercent() >= approvedThreshold : false;
        }
    }

    private static class ValidTargetFilterPredicate implements Predicate<Object[]> {
        private final LocaleId localeId;

        public ValidTargetFilterPredicate(LocaleId localeId) {
            this.localeId = localeId;
        }

        @Override
        public boolean apply(Object[] input) {
            Object entity = input[1];
            if (entity instanceof HTextFlowTarget) {
                HTextFlowTarget target = (HTextFlowTarget) entity;
                if (!target.getLocaleId().equals(localeId)) {
                    log.error("Unexpected TextFlowTarget (locale {}): {}. You may need to re-index.",
                            target.getLocaleId(), target);
                    return false;
                } else if (!target.getState().isTranslated()) {
                    log.error("Unexpected TextFlowTarget (state {}): {}. You may need to re-index.",
                            target.getState(), target);
                    return false;
                } else {
                    HProjectIteration version = target.getTextFlow().getDocument().getProjectIteration();
                    if (version.getStatus() == EntityStatus.OBSOLETE) {
                        log.debug("Discarding TextFlowTarget (obsolete iteration {}): {}", version, target);
                        return false;
                    } else if (version.getProject().getStatus() == EntityStatus.OBSOLETE) {
                        log.debug("Discarding TextFlowTarget (obsolete project {}): {}", version.getProject(),
                                target);
                        return false;
                    }
                }
                return true;
            } else if (entity instanceof TransMemoryUnit) {
                TransMemoryUnit tmu = ((TransMemoryUnit) entity);
                boolean includesTargetLocale = tmu.getTransUnitVariants().containsKey(localeId.getId());
                if (!includesTargetLocale) {
                    log.error("Unexpected TransMemoryUnit (no TUV in locale {}): {}. You may need to re-index.",
                            localeId.getId(), tmu);
                }
                return includesTargetLocale;
            } else if (entity == null) {
                log.error("Query results include null entity. You may need to re-index.");
                return false;
            } else {
                String name = entity.getClass().getName();
                log.warn("Unexpected query result of type {}: {}. You may need to re-index.", name, entity);
            }
            return true;
        }
    }

    /**
     * Responsible for running a query and collating the results.
     *
     * I am using a class to avoid having to pass several arguments through all
     * the helper methods, since that makes the code very hard to read.
     */
    private class QueryMatchProcessor {
        private final TransMemoryQuery query;
        private final LocaleId srcLocale;
        private final LocaleId transLocale;
        private final Optional<Long> textFlowTargetId;
        private final Map<TMKey, Suggestion> suggestions;
        private boolean processed;

        public QueryMatchProcessor(TransMemoryQuery query, LocaleId srcLocale, LocaleId transLocale,
                Optional<Long> textFlowTargetId) {
            this.query = query;
            this.srcLocale = srcLocale;
            this.transLocale = transLocale;
            this.textFlowTargetId = textFlowTargetId;
            suggestions = new HashMap<>();
            processed = false;
        }

        /**
         * Run the query, process and collate the results.
         *
         * Results are cached, so subsequent calls will return cached results
         * without running the query again.
         *
         * @return the collated results of the query.
         */

        public List<Suggestion> process() {
            if (!processed) {
                runQueryAndCacheSuggestions();
            }
            return new ArrayList<>(suggestions.values());
        }

        /**
         * When this has run, suggestions contains all the results of the query.
         */

        private void runQueryAndCacheSuggestions() {
            for (Object[] resultRow : runQuery()) {
                processResultRow(resultRow);
            }
            processed = true;
        }

        /**
         * Convert a result row to a match (if possible) then process the match.
         *
         * If the row does not contain an appropriate entity, an error is logged
         * and the row is skipped.
         *
         * @param resultRow
         *            in the form [Float score, Object entity]
         */

        private void processResultRow(Object[] resultRow) {
            try {
                final QueryMatch match = fromResultRow(resultRow);
                processMatch(match);
            } catch (IllegalArgumentException e) {
                log.error("Skipped result row because it does not contain an expected entity type: {}", resultRow,
                        e);
            }
        }

        /**
         * Run the full-text query.
         *
         * @return collection of [float, entity] where float is the match score
         *         and entity is a HTextFlowTarget or TransMemoryUnit.
         */

        private Collection<Object[]> runQuery() {
            return findMatchingTranslation(transLocale, srcLocale, query, SEARCH_MAX_RESULTS, textFlowTargetId,
                    HTextFlowTarget.class, TransMemoryUnit.class);
        }

        /**
         * Ensure there is a suggestion item for a match row and add a detail
         * item to the suggestion.
         *
         * Note: this updates this.suggestions
         *
         * @param match
         *            the row to add
         */

        private void processMatch(QueryMatch match) {
            TMKey key = match.getKey();
            Suggestion suggestion = suggestions.get(key);
            if (suggestion == null) {
                suggestion = createSuggestion(match);
                suggestions.put(key, suggestion);
            }
            suggestion.getMatchDetails().add(match.createDetails());
        }

        /**
         * Generate and return a suggestion object for the given match.
         *
         * @param match
         *            providing the contents and score for the suggestion
         * @return the created suggestion object
         */

        private Suggestion createSuggestion(QueryMatch match) {
            double similarity = calculateSimilarityPercentage(query, match.getSourceContents());
            return new Suggestion(match.getScore(), similarity, match.getSourceContents(),
                    match.getTargetContents());
        }

        private QueryMatch fromResultRow(Object[] match) {
            // matches are [Float score, Object entity], see #runQuery()
            float score = (Float) match[0];
            Object entity = match[1];
            if (entity instanceof HTextFlowTarget) {
                return new TextFlowTargetQueryMatch(score, (HTextFlowTarget) entity);
            }
            if (entity instanceof TransMemoryUnit) {
                return new TransMemoryUnitQueryMatch(score, (TransMemoryUnit) entity);
            }
            throw new IllegalArgumentException(
                    "Result type must be TextFlowTarget or TransMemoryUnit, but was neither");
        }

        /**
         * Represents a single row of results from a full-text query,
         * abstracting the type of entity returned in the row.
         */

        private abstract class QueryMatch {
            private float score;

            protected QueryMatch(float score) {
                this.score = score;
            }

            public TMKey getKey() {
                return new TMKey(getSourceContents(), getTargetContents());
            }

            public abstract List<String> getSourceContents();

            public abstract List<String> getTargetContents();

            public abstract SuggestionDetail createDetails();

            public float getScore() {
                return this.score;
            }
        }

        /**
         * Represents a single row of results containing a text flow target.
         */

        private class TextFlowTargetQueryMatch extends QueryMatch {
            private final List<String> sourceContents;
            private final List<String> targetContents;
            private final HTextFlowTarget target;

            public TextFlowTargetQueryMatch(float score, HTextFlowTarget textFlowTarget) {
                super(score);
                target = textFlowTarget;
                sourceContents = Lists.newArrayList(textFlowTarget.getTextFlow().getContents());
                targetContents = Lists.newArrayList(textFlowTarget.getContents());
            }

            @Override
            public SuggestionDetail createDetails() {
                return new TextFlowSuggestionDetail(target);
            }

            public List<String> getSourceContents() {
                return this.sourceContents;
            }

            public List<String> getTargetContents() {
                return this.targetContents;
            }
        }

        /**
         * Represents a single row of results containing a trans memory unit.
         */

        private class TransMemoryUnitQueryMatch extends QueryMatch {
            private final List<String> sourceContents;
            private final List<String> targetContents;
            private TransMemoryUnit tmUnit;

            public TransMemoryUnitQueryMatch(float score, TransMemoryUnit transMemoryUnit) {
                super(score);
                tmUnit = transMemoryUnit;
                sourceContents = getContents(srcLocale);
                targetContents = getContents(transLocale);
            }

            private ArrayList<String> getContents(LocaleId locale) {
                return Lists.newArrayList(tmUnit.getTransUnitVariants().get(locale.getId()).getPlainTextSegment());
            }

            @Override
            public SuggestionDetail createDetails() {
                return new TransMemoryUnitSuggestionDetail(tmUnit);
            }

            public List<String> getSourceContents() {
                return this.sourceContents;
            }

            public List<String> getTargetContents() {
                return this.targetContents;
            }
        }
    }
}