influent.server.data.PropertyMatchBuilder.java Source code

Java tutorial

Introduction

Here is the source code for influent.server.data.PropertyMatchBuilder.java

Source

/*
 * Copyright (C) 2013-2015 Uncharted Software Inc.
 *
 * Property of Uncharted(TM), formerly Oculus Info Inc.
 * http://uncharted.software/
 *
 * Released under the MIT License.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy of
 * this software and associated documentation files (the "Software"), to deal in
 * the Software without restriction, including without limitation the rights to
 * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
 * of the Software, and to permit persons to whom the Software is furnished to do
 * so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in all
 * copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */

package influent.server.data;

import influent.idl.FL_BoundedRange;
import influent.idl.FL_ClusteringDataAccess;
import influent.idl.FL_Constraint;
import influent.idl.FL_ListRange;
import influent.idl.FL_OrderBy;
import influent.idl.FL_PropertyDescriptor;
import influent.idl.FL_PropertyDescriptors;
import influent.idl.FL_PropertyMatchDescriptor;
import influent.idl.FL_PropertyType;
import influent.idl.FL_RequiredPropertyKey;
import influent.idl.FL_ReservedPropertyKey;
import influent.idl.FL_SearchableBy;
import influent.idl.FL_SingletonRange;
import influent.idl.FL_TypeMapping;
import influent.server.utilities.InfluentId;

import org.apache.avro.AvroRemoteException;
import org.joda.time.format.DateTimeFormat;
import org.joda.time.format.DateTimeFormatter;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;

import org.joda.time.DateTime;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.ListIterator;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class PropertyMatchBuilder {

    protected Map<String, List<FL_PropertyMatchDescriptor>> _descriptorMap = new HashMap<String, List<FL_PropertyMatchDescriptor>>();

    private String _matchType = null;
    private List<String> _dataTypes = null;
    private List<FL_OrderBy> _orderBy = null;
    private FL_ClusteringDataAccess _clusterDataAccess;

    private boolean _isLinkSearch = false;
    private boolean _isMultiType = false;

    static final Pattern freeTextRegEx = Pattern.compile("\\A([^:]*)(\\s*$| [^:\\s]+:.*)");
    static final Pattern termRegEx = Pattern.compile("([^:\\s]+):(\"([^\"]*)\"|[^:]*)( |$)");
    static final Pattern boostPattern = Pattern.compile("\\^([\\.0-9]+)$");
    static final Pattern quotePattern = Pattern.compile("((?<![\\\\])\")((?:.(?!(?<![\\\\])\\1))*.?)\\1");
    static final Pattern rangePattern = Pattern.compile("\\[(.*)?\\s(?:TO|to)\\s(.*)?\\]");
    static final Pattern similarityPattern = Pattern.compile("~([\\.0-9]+)?$");

    static final DateTimeFormatter dateFormatter = DateTimeFormat.forPattern("yyyy-MM-dd'T'HH:mm:ss'Z'");

    public PropertyMatchBuilder(String query, FL_PropertyDescriptors descriptors, boolean isLinkSearch,
            boolean isMultiType) {
        this(query, descriptors, null, isLinkSearch, isMultiType);
    }

    public PropertyMatchBuilder(String query, FL_PropertyDescriptors descriptors,
            FL_ClusteringDataAccess clusterDataAccess, boolean isLinkSearch, boolean isMultiType) {

        _isLinkSearch = isLinkSearch;
        _isMultiType = isMultiType;
        _clusterDataAccess = clusterDataAccess;

        // Special Terms
        Set<String> specialTermSet = _parseSpecialTags(termRegEx, query.trim(), descriptors);

        // Regular Terms
        Matcher termMatcher = termRegEx.matcher(query.trim());
        while (termMatcher.find()) {
            String termName = termMatcher.group(1).toString().trim();

            if (specialTermSet.contains(termName)) {
                continue;
            }

            String termValue = termMatcher.group(2).toString().trim();

            _parseTerm(termName, termValue, descriptors);
        }

        // Free text terms
        Matcher freeTextMatcher = freeTextRegEx.matcher(query);
        while (freeTextMatcher.find()) {
            String termValue = freeTextMatcher.group(1).toString().trim();

            _parseTerm(null, termValue, descriptors);
        }
    }

    public PropertyMatchBuilder(JSONObject pmdMap, boolean isLinkSearch, boolean isMultiType) throws JSONException {

        _isLinkSearch = isLinkSearch;
        _isMultiType = isMultiType;

        for (String type : JSONObject.getNames(pmdMap)) {
            JSONArray pmds = pmdMap.getJSONArray(type);
            List<FL_PropertyMatchDescriptor> termList = new ArrayList<FL_PropertyMatchDescriptor>();
            for (int i = 0; i < pmds.length(); i++) {
                // Start a term builder
                FL_PropertyMatchDescriptor.Builder termBuilder = FL_PropertyMatchDescriptor.newBuilder();
                JSONObject jObj = pmds.getJSONObject(i);
                if (!jObj.has("key") || !jObj.has("range")) {
                    continue;
                }

                termBuilder.setKey(jObj.getString("key"));

                if (jObj.has("typeMappings")) {
                    JSONArray jsonTypeMaps = jObj.getJSONArray("typeMappings");
                    //TODO handle list range
                    JSONObject rangeObj = jObj.getJSONObject("range");

                    String key = jObj.getString("key");

                    List<FL_TypeMapping> typeMaps = new ArrayList<FL_TypeMapping>();
                    //We should only have one type map here
                    for (int j = 0; j < jsonTypeMaps.length(); j++) {
                        JSONObject typeMap = jsonTypeMaps.getJSONObject(j);

                        FL_TypeMapping.Builder typeMapBuilder = FL_TypeMapping.newBuilder();
                        typeMapBuilder.setMemberKey(typeMap.getString("memberKey"));
                        typeMapBuilder.setType(typeMap.getString("type"));
                        typeMaps.add(typeMapBuilder.build());
                    }

                    if (typeMaps.size() < 1) {
                        continue;
                    }

                    termBuilder.setTypeMappings(typeMaps);

                    List<Object> values = new ArrayList<Object>(Arrays.asList(rangeObj.getString("value")));

                    if (key.equals(FL_RequiredPropertyKey.FROM.name())
                            || key.equals(FL_RequiredPropertyKey.TO.name())
                            || key.equals(FL_RequiredPropertyKey.ENTITY.name())
                            || key.equals(FL_RequiredPropertyKey.ID.name())
                            || key.equals(FL_RequiredPropertyKey.LINKED.name())) {

                        values = processIds(values, typeMaps.get(0));

                    }

                    if (values.size() == 0) {
                        // Stripped out all the values? Throw it out
                        continue;
                    } else if (values.size() == 1) {
                        // Singletons
                        termBuilder.setRange(FL_SingletonRange.newBuilder()
                                .setType(FL_PropertyType.valueOf(rangeObj.getString("type")))
                                .setValue(values.get(0)).build());
                    } else {
                        // Lists
                        termBuilder.setRange(FL_ListRange.newBuilder()
                                .setType(FL_PropertyType.valueOf(rangeObj.getString("type")))
                                .setValues(new ArrayList<Object>(values)).build());
                    }
                }

                //Populate the rest of the term, using default values if needed
                if (jObj.has("constraint")) {
                    FL_Constraint con = FL_Constraint.valueOf(jObj.getString("constraint"));
                    termBuilder.setConstraint(con);
                } else {
                    termBuilder.setConstraint(FL_Constraint.OPTIONAL_EQUALS);
                }

                if (jObj.has("variable")) {
                    termBuilder.setVariable(jObj.getString("variable"));
                } else {
                    termBuilder.setVariable("");
                }

                if (jObj.has("weight")) {
                    termBuilder.setWeight((float) jObj.getInt("weight"));
                } else {
                    termBuilder.setWeight(new Float(1));
                }

                if (jObj.has("similarity")) {
                    termBuilder.setWeight((float) jObj.getInt("similarity"));
                } else {
                    termBuilder.setSimilarity(new Float(1));
                }

                if (jObj.has("include")) {
                    termBuilder.setInclude(jObj.getBoolean("include"));
                } else {
                    termBuilder.setInclude(true);
                }

                termList.add(termBuilder.build());
            }
            //Add descriptor to map
            _descriptorMap.put(type, termList);
        }
    }

    private Set<String> _parseSpecialTags(Pattern regex, String term, FL_PropertyDescriptors descriptors) {
        Matcher specialTagsMatcher = regex.matcher(term.trim());
        Set<String> specialTags = new HashSet<String>();
        while (specialTagsMatcher.find()) {

            String tagName = specialTagsMatcher.group(1).toString().trim();

            if (tagName.equals(FL_ReservedPropertyKey.TYPE.name())) {
                String dataType = specialTagsMatcher.group(2).toString().trim();
                dataType = dataType.replace("\"", "");
                dataType = dataType.trim();
                if (this._dataTypes == null) {
                    this._dataTypes = new ArrayList<String>();
                }
                this._dataTypes.add(dataType);
                specialTags.add(FL_ReservedPropertyKey.TYPE.name());
                continue;
            }

            if (tagName.equals(FL_ReservedPropertyKey.MATCH.name())) {
                String matchTypeString = specialTagsMatcher.group(2).toString().trim();
                if (matchTypeString != null) {
                    _matchType = matchTypeString;
                }
                _matchType = _matchType.replace("\"", "");
                _matchType = _matchType.trim();

                specialTags.add(FL_ReservedPropertyKey.MATCH.name());
                continue;
            }

            if (tagName.equals(FL_ReservedPropertyKey.ORDER.name())) {
                String order = specialTagsMatcher.group(2).toString().trim();
                if (order != null) {
                    boolean asc = false;

                    if (order.endsWith("^")) {
                        asc = true;
                        order = order.substring(0, order.length() - 1);
                    }

                    order = order.replace("\"", "");
                    order = order.trim();

                    boolean canOrder = false;
                    if (FL_ReservedPropertyKey.MATCH.name().equals(order)) {
                        canOrder = true;
                    } else {
                        for (FL_PropertyDescriptor pd : descriptors.getProperties()) {
                            String propertyKey = pd.getKey();
                            if (propertyKey.equals(order)) {
                                canOrder = pd.getSortable();
                            }
                        }
                    }

                    if (canOrder) {
                        if (this._orderBy == null) {
                            this._orderBy = new ArrayList<FL_OrderBy>();
                        }

                        this._orderBy.add(FL_OrderBy.newBuilder().setAscending(asc).setPropertyKey(order).build());

                        specialTags.add(FL_ReservedPropertyKey.ORDER.name());
                    }
                }
                continue;
            }

        }
        return specialTags;
    }

    @SuppressWarnings("incomplete-switch")
    private boolean _typeCheckPropertyValue(FL_PropertyDescriptor pd, Object range) {

        Collection<Object> values = null;
        if (range instanceof FL_SingletonRange) {
            values = Collections.singleton(((FL_SingletonRange) range).getValue());
        } else if (range instanceof FL_ListRange) {
            values = ((FL_ListRange) range).getValues();
        }

        for (Object obj : values) {

            switch (pd.getPropertyType()) {
            case LONG: {
                Integer i = null;
                try {
                    i = Integer.valueOf((String) obj);
                } catch (Exception e) {
                }

                if (i == null) {
                    return false;
                }
                break;
            }
            }
        }

        return true;
    }

    private List<Object> processIds(List<Object> values, FL_TypeMapping typeMapping) {
        // Expand any leaf ids for transactions
        if (_isLinkSearch && _clusterDataAccess != null) {

            int idListSize = values.size();
            for (int i = 0; i < idListSize; i++) {

                List<String> leafIds = null;
                String uid = (String) values.get(i);
                try {
                    leafIds = _clusterDataAccess.getLeafIds(Collections.singletonList(uid), null, true);

                    for (String id : leafIds) {
                        if (!id.equals(uid)) {
                            values.add(id);
                        }
                    }
                } catch (AvroRemoteException e) {
                    e.printStackTrace();
                }
            }
        }

        for (ListIterator<Object> it = values.listIterator(); it.hasNext();) {
            String uid = (String) it.next();
            InfluentId tId = InfluentId.fromInfluentId(uid);
            String type = null;

            if (!_isLinkSearch) {
                type = tId.getIdType();
            }

            if (type == null || type.equals(typeMapping.getType())) {
                // This ID should be searched.
                if (_isMultiType) {
                    it.set(tId.getTypedId());
                } else {
                    it.set(tId.getNativeId());
                }
            } else {
                it.remove();
            }
        }
        return values;
    }

    private void _parseTerm(String termName, String termValue, FL_PropertyDescriptors descriptors) {

        if (termValue == null || termValue.isEmpty())
            return;

        boolean isQuoted = false;
        boolean isRange = false;
        boolean isFuzzy = false;
        boolean isNegation = false;

        // Start a term builder
        FL_PropertyMatchDescriptor.Builder termBuilder = FL_PropertyMatchDescriptor.newBuilder();

        boolean isFreeText = termName == null;

        if (!isFreeText) {

            // Negation
            if (termName.startsWith("-")) {
                termName = termName.substring(1);
                termBuilder.setInclude(false);
                isNegation = true;
            }
        }

        // Term Boosting
        Matcher boostMatch = boostPattern.matcher(termValue);
        if (boostMatch.find()) {
            String weightStr = boostMatch.group(1);

            try {
                Float weight = Float.valueOf(weightStr);
                termValue = termValue.substring(0, termValue.length() - weightStr.length() - 1);

                termBuilder.setWeight(weight);

            } catch (Exception e) {
            }

        }

        // Fuzzy matching
        Matcher similarityMatch = similarityPattern.matcher(termValue);
        if (similarityMatch.find()) {
            String similarityStr = similarityMatch.group(1);

            try {
                Float similarity = similarityStr == null ? 0.5f : Float.valueOf(similarityStr);
                termValue = similarityMatch.group(1);

                termBuilder.setSimilarity(similarity);

                isFuzzy = true;

            } catch (Exception e) {
            }

        }

        // Quotes and ranges
        Matcher quoteMatch = quotePattern.matcher(termValue);
        Matcher rangeMatch = rangePattern.matcher(termValue);
        String rangeStart = null;
        String rangeEnd = null;

        if (quoteMatch.find()) {
            termValue = quoteMatch.group(2);
            isQuoted = true;
        }

        if (rangeMatch.find()) {
            isRange = true;
            rangeStart = rangeMatch.group(1);
            rangeEnd = rangeMatch.group(2);
        }

        // Constraints
        if (!isFuzzy && (isQuoted || isFreeText || isRange || isNegation)) {
            if (_matchType == null || _matchType.equalsIgnoreCase("any")) {
                termBuilder.setConstraint(FL_Constraint.OPTIONAL_EQUALS);
            } else {
                termBuilder.setConstraint(FL_Constraint.REQUIRED_EQUALS);
            }
        } else {
            if (_matchType == null || _matchType.equalsIgnoreCase("any")) {
                termBuilder.setConstraint(FL_Constraint.FUZZY_PARTIAL_OPTIONAL);
            } else {
                termBuilder.setConstraint(FL_Constraint.FUZZY_REQUIRED);
            }
        }

        // Match properties to Search Descriptors
        for (FL_PropertyDescriptor pd : descriptors.getProperties()) {
            String propertyKey = pd.getKey();
            if (propertyKey.equals(termName) || isFreeText) {

                for (FL_TypeMapping td : pd.getMemberOf()) {

                    if (isFreeText && !FL_SearchableBy.FREE_TEXT.equals(pd.getSearchableBy())) {

                        // If the term is freetext, but this isn't a freetext searchable property, continue;
                        continue;
                    } else if (_dataTypes != null && !_dataTypes.contains(td.getType())) {

                        // If the term doesn't match the given datatypes, then continue;
                        continue;
                    }

                    termBuilder.setKey(pd.getKey());

                    // Special handling for dates
                    if (pd.getPropertyType() == FL_PropertyType.DATE) {

                        // Dates become unfuzzied
                        if (_matchType == null || _matchType.equalsIgnoreCase("any")) {
                            termBuilder.setConstraint(FL_Constraint.OPTIONAL_EQUALS);
                        } else {
                            termBuilder.setConstraint(FL_Constraint.REQUIRED_EQUALS);
                        }

                        if (!isRange) {
                            // Singleton dates become ranges across the whole 24h day
                            DateTime date = new DateTime(termValue);
                            rangeStart = dateFormatter.print(date.withTime(0, 0, 0, 0));
                            rangeEnd = dateFormatter.print(date.withTime(23, 59, 59, 59));
                            isRange = true;
                        } else {
                            if (!rangeStart.equals("*")) {
                                DateTime startDate = new DateTime(rangeStart);
                                rangeStart = dateFormatter.print(startDate.withTime(0, 0, 0, 0)); // From 0h on start date..
                            }

                            if (!rangeEnd.equals("*")) {
                                DateTime endDate = new DateTime(rangeEnd);
                                rangeEnd = dateFormatter.print(endDate.minusMillis(3)); // .. to 23:59:59.997 exclusive of the end date (max SQL millisecond precision)
                            }
                        }
                    }

                    // Set the term value in the builder
                    if (isRange) {
                        termBuilder.setRange(FL_BoundedRange.newBuilder().setStart(rangeStart).setEnd(rangeEnd)
                                .setInclusive(true).setType(pd.getPropertyType()).build());
                    } else {

                        // freetext is comma/space delimited (as long as it's not quoted). Everything else is comma delimited.
                        List<Object> values = new ArrayList<Object>(Arrays.asList(
                                termValue.split(isFreeText && !isQuoted ? "(?<!\\\\)( |,)" : "(?<!\\\\),")));

                        if (pd.getKey().equals(FL_RequiredPropertyKey.FROM.name())
                                || pd.getKey().equals(FL_RequiredPropertyKey.TO.name())
                                || pd.getKey().equals(FL_RequiredPropertyKey.ENTITY.name())
                                || pd.getKey().equals(FL_RequiredPropertyKey.LINKED.name())) {

                            values = processIds(values, td);

                        }

                        if (values.size() == 0) {
                            // Stripped out all the values? Throw it out
                            continue;
                        } else if (values.size() == 1) {
                            // Singletons
                            termBuilder.setRange(FL_SingletonRange.newBuilder().setType(pd.getPropertyType())
                                    .setValue(values.get(0)).build());
                        } else {
                            // Lists
                            termBuilder.setRange(FL_ListRange.newBuilder().setType(pd.getPropertyType())
                                    .setValues(new ArrayList<Object>(values)).build());
                        }
                    }

                    if (isFreeText) {
                        // Freetext, but not the correct type?
                        if (!_typeCheckPropertyValue(pd, termBuilder.getRange())) {
                            continue;
                        }
                    }

                    termBuilder.setTypeMappings(Collections.singletonList(td));

                    List<FL_PropertyMatchDescriptor> termList = _descriptorMap.get(td.getType());
                    if (termList == null) {
                        termList = new ArrayList<FL_PropertyMatchDescriptor>();
                    }

                    termList.add(termBuilder.build());

                    _descriptorMap.put(td.getType(), termList);
                }
            }
        }

    }

    public Map<String, List<FL_PropertyMatchDescriptor>> getDescriptorMap() {
        return _descriptorMap;
    }

    /**
     * @return the orderBy list
     */
    public List<FL_OrderBy> getOrderBy() {
        return _orderBy;
    }

}