com.amalto.core.storage.hibernate.LuceneQueryGenerator.java Source code

Java tutorial

Introduction

Here is the source code for com.amalto.core.storage.hibernate.LuceneQueryGenerator.java

Source

/*
 * Copyright (C) 2006-2016 Talend Inc. - www.talend.com
 * 
 * This source code is available under agreement available at
 * %InstallDIR%\features\org.talend.rcp.branding.%PRODUCTNAME%\%PRODUCTNAME%license.txt
 * 
 * You should have received a copy of the agreement along with this program; if not, write to Talend SA 9 rue Pages
 * 92150 Suresnes, France
 */

package com.amalto.core.storage.hibernate;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.StringTokenizer;

import org.apache.commons.lang.NotImplementedException;
import org.apache.commons.lang.StringUtils;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.NumericRangeQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.talend.mdm.commmon.metadata.ComplexTypeMetadata;
import org.talend.mdm.commmon.metadata.ContainedComplexTypeMetadata;
import org.talend.mdm.commmon.metadata.DefaultMetadataVisitor;
import org.talend.mdm.commmon.metadata.EnumerationFieldMetadata;
import org.talend.mdm.commmon.metadata.ReferenceFieldMetadata;
import org.talend.mdm.commmon.metadata.SimpleTypeFieldMetadata;

import com.amalto.core.query.user.Alias;
import com.amalto.core.query.user.BigDecimalConstant;
import com.amalto.core.query.user.BinaryLogicOperator;
import com.amalto.core.query.user.BooleanConstant;
import com.amalto.core.query.user.ByteConstant;
import com.amalto.core.query.user.Compare;
import com.amalto.core.query.user.DateConstant;
import com.amalto.core.query.user.DateTimeConstant;
import com.amalto.core.query.user.DoubleConstant;
import com.amalto.core.query.user.Expression;
import com.amalto.core.query.user.Field;
import com.amalto.core.query.user.FieldFullText;
import com.amalto.core.query.user.FloatConstant;
import com.amalto.core.query.user.FullText;
import com.amalto.core.query.user.IntegerConstant;
import com.amalto.core.query.user.LongConstant;
import com.amalto.core.query.user.Predicate;
import com.amalto.core.query.user.Range;
import com.amalto.core.query.user.ShortConstant;
import com.amalto.core.query.user.StringConstant;
import com.amalto.core.query.user.TimeConstant;
import com.amalto.core.query.user.UnaryLogicOperator;
import com.amalto.core.query.user.VisitorAdapter;
import com.amalto.core.query.user.metadata.MetadataField;
import com.amalto.core.query.user.metadata.StagingBlockKey;
import com.amalto.core.query.user.metadata.StagingError;
import com.amalto.core.query.user.metadata.StagingSource;
import com.amalto.core.query.user.metadata.StagingStatus;
import com.amalto.core.query.user.metadata.TaskId;
import com.amalto.core.query.user.metadata.Timestamp;
import com.amalto.core.storage.Storage;
import com.amalto.core.storage.StorageMetadataUtils;
import com.amalto.core.storage.exception.UnsupportedFullTextQueryException;

class LuceneQueryGenerator extends VisitorAdapter<Query> {

    private final Collection<ComplexTypeMetadata> types;

    private String currentFieldName;

    private Object currentValue;

    private boolean isBuildingNot;

    LuceneQueryGenerator(Collection<ComplexTypeMetadata> types) {
        this.types = types;
    }

    @Override
    public Query visit(Compare condition) {
        condition.getLeft().accept(this);
        Expression right = condition.getRight();
        right.accept(this);
        if (condition.getPredicate() == Predicate.EQUALS || condition.getPredicate() == Predicate.CONTAINS
                || condition.getPredicate() == Predicate.STARTS_WITH) {
            String searchValue = String.valueOf(currentValue);
            BooleanQuery termQuery = new BooleanQuery();
            if (searchValue != null && searchValue.startsWith("\'") && searchValue.endsWith("\'")) { //$NON-NLS-1$ //$NON-NLS-2$
                PhraseQuery query = new PhraseQuery();
                StringTokenizer tokenizer = new StringTokenizer(searchValue.substring(1, searchValue.length() - 1));
                while (tokenizer.hasMoreTokens()) {
                    query.add(new Term(currentFieldName, tokenizer.nextToken().toLowerCase()));
                }
                termQuery.add(query, BooleanClause.Occur.SHOULD);
            } else {
                StringTokenizer tokenizer = new StringTokenizer(searchValue);
                while (tokenizer.hasMoreTokens()) {
                    TermQuery newTermQuery = new TermQuery(
                            new Term(currentFieldName, tokenizer.nextToken().toLowerCase()));
                    termQuery.add(newTermQuery,
                            isBuildingNot ? BooleanClause.Occur.MUST_NOT : BooleanClause.Occur.MUST);
                    if (condition.getPredicate() == Predicate.STARTS_WITH) {
                        break;
                    }
                }
            }
            return termQuery;
        } else if (condition.getPredicate() == Predicate.GREATER_THAN
                || condition.getPredicate() == Predicate.GREATER_THAN_OR_EQUALS
                || condition.getPredicate() == Predicate.LOWER_THAN
                || condition.getPredicate() == Predicate.LOWER_THAN_OR_EQUALS) {
            throw new RuntimeException("Greater than, less than are not supported in full text searches."); //$NON-NLS-1$
        } else {
            throw new NotImplementedException("No support for predicate '" + condition.getPredicate() + "'"); //$NON-NLS-1$ //$NON-NLS-2$
        }
    }

    @Override
    public Query visit(BinaryLogicOperator condition) {
        Query left = condition.getLeft().accept(this);
        Query right = condition.getRight().accept(this);
        BooleanQuery query = new BooleanQuery();
        if (condition.getPredicate() == Predicate.OR) {
            query.add(left, BooleanClause.Occur.SHOULD);
            query.add(right, BooleanClause.Occur.SHOULD);
        } else if (condition.getPredicate() == Predicate.AND) {
            query.add(left, isNotQuery(left) ? BooleanClause.Occur.SHOULD : BooleanClause.Occur.MUST);
            query.add(right, isNotQuery(right) ? BooleanClause.Occur.SHOULD : BooleanClause.Occur.MUST);
        } else {
            throw new NotImplementedException("No support for '" + condition.getPredicate() + "'."); //$NON-NLS-1$ //$NON-NLS-2$
        }
        return query;
    }

    private static boolean isNotQuery(Query left) {
        if (left instanceof BooleanQuery) {
            for (BooleanClause booleanClause : ((BooleanQuery) left).getClauses()) {
                if (booleanClause.getOccur() == BooleanClause.Occur.MUST_NOT) {
                    return true;
                }
            }
        }
        return false;
    }

    @Override
    public Query visit(UnaryLogicOperator condition) {
        if (condition.getPredicate() == Predicate.NOT) {
            isBuildingNot = true;
            Query query = condition.getCondition().accept(this);
            isBuildingNot = false;
            return query;
        } else {
            throw new NotImplementedException("No support for predicate '" + condition.getPredicate() + "'."); //$NON-NLS-1$//$NON-NLS-2$
        }
    }

    @Override
    public Query visit(Range range) {
        if (range.getExpression() instanceof MetadataField) {
            if (range.getExpression() instanceof Timestamp) {
                Timestamp field = (Timestamp) range.getExpression();
                field.accept(this);
            } else {
                MetadataField field = (MetadataField) range.getExpression();
                field.getProjectionExpression().accept(this);
            }
        }
        range.getStart().accept(this);
        Long currentRangeStart = ((Long) currentValue) == Long.MIN_VALUE ? null : (Long) currentValue;
        range.getEnd().accept(this);
        Long currentRangeEnd = ((Long) currentValue) == Long.MAX_VALUE ? null : (Long) currentValue;
        return NumericRangeQuery.newLongRange(currentFieldName, currentRangeStart, currentRangeEnd, true, true);
    }

    @Override
    public Query visit(Timestamp timestamp) {
        currentFieldName = Storage.METADATA_TIMESTAMP;
        return null;
    }

    @Override
    public Query visit(StagingStatus stagingStatus) {
        currentFieldName = Storage.METADATA_STAGING_STATUS;
        return null;
    }

    @Override
    public Query visit(TaskId taskId) {
        currentFieldName = Storage.METADATA_TASK_ID;
        return null;
    }

    @Override
    public Query visit(StagingError stagingError) {
        currentFieldName = Storage.METADATA_STAGING_ERROR;
        return null;
    }

    @Override
    public Query visit(StagingSource stagingSource) {
        currentFieldName = Storage.METADATA_STAGING_SOURCE;
        return null;
    }

    @Override
    public Query visit(StagingBlockKey stagingBlockKey) {
        currentFieldName = Storage.METADATA_STAGING_BLOCK_KEY;
        return null;
    }

    @Override
    public Query visit(Field field) {
        currentFieldName = field.getFieldMetadata().getName();
        return null;
    }

    @Override
    public Query visit(Alias alias) {
        currentFieldName = alias.getAliasName();
        return null;
    }

    @Override
    public Query visit(StringConstant constant) {
        currentValue = constant.getValue();
        return null;
    }

    @Override
    public Query visit(IntegerConstant constant) {
        currentValue = constant.getValue();
        return null;
    }

    @Override
    public Query visit(DateConstant constant) {
        currentValue = constant.getValue();
        return null;
    }

    @Override
    public Query visit(DateTimeConstant constant) {
        currentValue = constant.getValue();
        return null;
    }

    @Override
    public Query visit(BooleanConstant constant) {
        currentValue = constant.getValue();
        return null;
    }

    @Override
    public Query visit(BigDecimalConstant constant) {
        currentValue = constant.getValue();
        return null;
    }

    @Override
    public Query visit(TimeConstant constant) {
        currentValue = constant.getValue();
        return null;
    }

    @Override
    public Query visit(ShortConstant constant) {
        currentValue = constant.getValue();
        return null;
    }

    @Override
    public Query visit(ByteConstant constant) {
        currentValue = constant.getValue();
        return null;
    }

    @Override
    public Query visit(LongConstant constant) {
        currentValue = constant.getValue();
        return null;
    }

    @Override
    public Query visit(DoubleConstant constant) {
        currentValue = constant.getValue();
        return null;
    }

    @Override
    public Query visit(FloatConstant constant) {
        currentValue = constant.getValue();
        return null;
    }

    @Override
    public Query visit(final FullText fullText) {
        // TODO Test me on conditions where many types share same field names.
        final Map<String, Boolean> fieldsMap = new HashMap<String, Boolean>();
        for (final ComplexTypeMetadata type : types) {
            type.accept(new DefaultMetadataVisitor<Void>() {

                @Override
                public Void visit(ContainedComplexTypeMetadata containedType) {
                    super.visit(containedType);
                    for (ComplexTypeMetadata subType : containedType.getSubTypes()) {
                        subType.accept(this);
                    }
                    return null;
                }

                @Override
                public Void visit(ReferenceFieldMetadata referenceField) {
                    ComplexTypeMetadata referencedType = referenceField.getReferencedType();
                    if (!referencedType.isInstantiable()) {
                        referencedType.accept(this);
                    }
                    return null;
                }

                @Override
                public Void visit(SimpleTypeFieldMetadata simpleField) {
                    if (!Storage.METADATA_TIMESTAMP.equals(simpleField.getName())
                            && !Storage.METADATA_TASK_ID.equals(simpleField.getName())) {
                        if (StorageMetadataUtils.isValueSearchable(fullText.getValue(), simpleField)) {
                            fieldsMap.put(simpleField.getName(), simpleField.isKey());
                        }
                    }
                    return null;
                }

                @Override
                public Void visit(EnumerationFieldMetadata enumField) {
                    if (StorageMetadataUtils.isValueAssignable(fullText.getValue(), enumField)) {
                        fieldsMap.put(enumField.getName(), enumField.isKey());
                    }
                    return null;
                }
            });
        }

        String[] fieldsAsArray = fieldsMap.keySet().toArray(new String[fieldsMap.size()]);
        StringBuilder queryBuffer = new StringBuilder();
        Iterator<Map.Entry<String, Boolean>> fieldsIterator = fieldsMap.entrySet().iterator();
        String fullTextValue = getFullTextValue(fullText);
        BooleanQuery query = new BooleanQuery();
        Query idQuery = null;
        while (fieldsIterator.hasNext()) {
            Map.Entry<String, Boolean> next = fieldsIterator.next();
            if (next.getValue()) {
                queryBuffer.append(next.getKey()).append(ToLowerCaseFieldBridge.ID_POSTFIX + ':')
                        .append(fullTextValue);
                idQuery = new PrefixQuery(new Term(next.getKey(), fullText.getValue()));
            } else {
                queryBuffer.append(next.getKey()).append(':').append(fullTextValue);
            }
            if (fieldsIterator.hasNext()) {
                queryBuffer.append(" OR "); //$NON-NLS-1$
            }
        }

        String fullTextQuery = queryBuffer.toString();
        if (idQuery != null) {
            query.add(idQuery, BooleanClause.Occur.SHOULD);
        }
        query.add(parseQuery(fieldsAsArray, fullTextQuery, fullText.getValue()), BooleanClause.Occur.SHOULD);
        return query;
    }

    @Override
    public Query visit(FieldFullText fieldFullText) {
        String fieldName = fieldFullText.getField().getFieldMetadata().getName();
        String[] fieldsAsArray = new String[] { fieldName };
        String fullTextValue = getFullTextValue(fieldFullText);
        String fullTextQuery = fieldName + ':' + fullTextValue;
        if (fieldFullText.getField().getFieldMetadata().isKey()) {
            BooleanQuery query = new BooleanQuery();
            query.add(new PrefixQuery(new Term(fieldName, fieldFullText.getValue())), BooleanClause.Occur.SHOULD);
            fieldsAsArray = new String[] { fieldName + ToLowerCaseFieldBridge.ID_POSTFIX };
            fullTextQuery = fieldName + ToLowerCaseFieldBridge.ID_POSTFIX + ":" + fullTextValue; //$NON-NLS-1$
            query.add(parseQuery(fieldsAsArray, fullTextQuery, fieldFullText.getValue()),
                    BooleanClause.Occur.SHOULD);
            return query;
        }
        return parseQuery(fieldsAsArray, fullTextQuery, fieldFullText.getValue());
    }

    private Query parseQuery(String[] fieldsAsArray, String fullTextQuery, String keywords) {
        MultiFieldQueryParser parser = new MultiFieldQueryParser(fieldsAsArray, new StandardAnalyzer());
        // Very important! Lucene does an implicit lower case for "expanded terms" (which is something used).
        parser.setLowercaseExpandedTerms(true);
        try {
            return parser.parse(fullTextQuery);
        } catch (Exception e) {
            if (org.apache.lucene.queryparser.classic.ParseException.class.isInstance(e)) {
                throw new UnsupportedFullTextQueryException("'" + keywords + "' is unsupported keywords", e); //$NON-NLS-1$ //$NON-NLS-2$
            }
            throw new RuntimeException("Invalid generated Lucene query", e); //$NON-NLS-1$
        }
    }

    private static String getFullTextValue(FullText fullText) {
        return getSearchTextValue(fullText.getValue().toLowerCase().trim());
    }

    private static String getSearchTextValue(String value) {
        int index = 0;
        while (value.charAt(index) == '*') { // Skip '*' characters at beginning.
            index++;
        }
        if (index > 0) {
            value = value.substring(index);
        }
        char[] removes = new char[] { '[', ']', '+', '!', '(', ')', '^', '\"', '~', ':', ';', '\\', '-', '@', '#',
                '$', '%', '&', '=', ',', '.', '<', '>' }; // Removes reserved
        // characters
        for (char remove : removes) {
            value = value.replace(remove, ' ');
        }
        if (value != null && value.length() > 1 && value.startsWith("'") && value.endsWith("'")) { //$NON-NLS-1$//$NON-NLS-2$
            value = "\"" + value.substring(1, value.length() - 1) + "\""; //$NON-NLS-1$ //$NON-NLS-2$
        } else {
            if (value.contains(" ")) { //$NON-NLS-1$
                return getMultiKeywords(value);
            } else {
                if (!value.endsWith("*")) { //$NON-NLS-1$
                    value += '*';
                }
            }
        }
        return value;
    }

    private static String getMultiKeywords(String value) {
        List<String> blocks = new ArrayList<String>(Arrays.asList(value.split(" "))); //$NON-NLS-1$
        StringBuffer sb = new StringBuffer();
        for (String block : blocks) {
            if (StringUtils.isNotEmpty(block)) {
                if (!block.endsWith("*")) { //$NON-NLS-1$
                    sb.append(block + "* "); //$NON-NLS-1$
                } else {
                    sb.append(block + " "); //$NON-NLS-1$
                }
            }
        }
        return sb.toString();
    }
}