org.opencb.commons.datastore.solr.FacetQueryParser.java Source code

Java tutorial

Introduction

Here is the source code for org.opencb.commons.datastore.solr.FacetQueryParser.java

Source

/*
 * Copyright 2015-2017 OpenCB
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.opencb.commons.datastore.solr;

import com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.commons.lang3.StringUtils;

import java.io.IOException;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class FacetQueryParser {

    public static final String FACET_SEPARATOR = ";";
    public static final String LABEL_SEPARATOR = "___";
    private static final String NESTED_FACET_SEPARATOR = ">>";
    private static final String NESTED_SUBFACET_SEPARATOR = ",";
    public static final String INCLUDE_SEPARATOR = ",";
    private static final String RANGE_IDENTIFIER = "..";
    private static final String AGGREGATION_IDENTIFIER = "(";
    private static final String[] AGGREGATION_FUNCTIONS = { "sum", "avg", "max", "min", "unique", "percentile",
            "sumsq", "variance", "stddev", };
    public static final Pattern CATEGORICAL_PATTERN = Pattern
            .compile("^([a-zA-Z][a-zA-Z0-9_.]+)(\\[[a-zA-Z0-9,*]+])?(:\\*|:\\d+)?$");

    private static final int DEFAULT_FACET_LIMIT = 50;

    private int count;

    public FacetQueryParser() {
        count = 0;
    }

    /**
     * This method accepts a simple facet declaration format and converts it into a rich JSON query.
     *
     * @param query A string with the format: chrom>>type,biotype,avg>>gerp,avg;type;biotype>>sadasd
     * @return A JSON string facet query
     * @throws Exception Any exception related with JSON conversion
     */
    public String parse(String query) throws Exception {
        if (StringUtils.isEmpty(query)) {
            return "";
        }

        Map<String, Object> jsonFacetMap = new LinkedHashMap<>();
        String[] split = query.split(FACET_SEPARATOR);
        for (String facet : split) {
            if (facet.contains(NESTED_FACET_SEPARATOR)) {
                parseNestedFacet(facet, jsonFacetMap);
            } else {
                List<String> simpleFacets = getSubFacets(facet);
                for (String simpleFacet : simpleFacets) {
                    parseSimpleFacet(simpleFacet, jsonFacetMap);
                }
            }
        }

        return parseJson(new ObjectMapper().writeValueAsString(jsonFacetMap));
    }

    private void parseSimpleFacet(String facet, Map<String, Object> jsonFacetMap) throws Exception {
        Map<String, Object> facetMap = parseFacet(facet);
        String label;
        if (facetMap == null) {
            // Aggregation
            label = getLabelFromAggregation(facet);
            if (facet.startsWith("percentile")) {
                jsonFacetMap.put(label, facet.replace(")", ",1,10,25,50,75,90,99)"));
            } else {
                jsonFacetMap.put(label, facet);
            }
        } else {
            // Categorical or range
            label = getLabel(facetMap);
            jsonFacetMap.put(label, facetMap);
        }
    }

    private String getLabel(Map<String, Object> facetMap) {
        String label = (facetMap.containsKey("field") ? facetMap.get("field").toString() : "noname");
        // Range
        if (facetMap.containsKey("step")) {
            label += LABEL_SEPARATOR + facetMap.get("start") + LABEL_SEPARATOR + facetMap.get("end")
                    + LABEL_SEPARATOR + facetMap.get("step");
        }
        return label + LABEL_SEPARATOR + (count++);
    }

    private String getLabelFromAggregation(String facet) {
        String aggregationName = facet.substring(0, facet.indexOf("("));
        String fieldName = facet.substring(facet.indexOf("(") + 1, facet.indexOf(")"));
        String label = fieldName + LABEL_SEPARATOR + aggregationName;
        return label + LABEL_SEPARATOR + (count++);
    }

    /**
     * Parse a facet and return the map containing the facet fields.
     *
     * @param facet facet string
     * @return the map containing the facet fields.
     */
    private Map<String, Object> parseFacet(String facet) throws Exception {
        Map<String, Object> outputMap = new HashMap<>();
        if (facet.contains(AGGREGATION_IDENTIFIER)) {
            // Validate function
            for (String function : AGGREGATION_FUNCTIONS) {
                if (facet.startsWith(function)) {
                    return null;
                }
            }
            throw new Exception("Invalid aggregation function: " + facet);
        } else if (facet.contains(RANGE_IDENTIFIER)) {
            // Deal with ranges...
            int numMatches = StringUtils.countMatches(facet, ':');
            if (numMatches > 1) {
                // Special case: facet with type "query" and with a nested "range"
                String[] split = facet.split(":");
                Map<String, Object> rangeMap = new HashMap<>();
                StringBuilder filter = new StringBuilder(split[3]);
                for (int i = 4; i < split.length; i++) {
                    filter.append(":").append(split[i]);
                }

                String[] range = split[0].replace("[", ":").replace("..", ":").replace("]", "").split(":");
                rangeMap.put("field", range[0]);
                rangeMap.put("type", "range");
                rangeMap.put("start", parseNumber(range[1]));
                rangeMap.put("end", parseNumber(range[2]));
                rangeMap.put("step", parseNumber(split[1]));

                String[] nested = split[2].split(LABEL_SEPARATOR);
                outputMap.put("field", nested[0]);
                outputMap.put("type", "query");
                outputMap.put("q", filter.toString());
                if (nested.length > 1) {
                    Map<String, Object> nestedMap = new HashMap<>();
                    nestedMap.put("field", nested[1]);
                    nestedMap.put("type", "terms");
                    Map<String, Object> auxMap1 = new HashMap<>();
                    auxMap1.put(range[0], rangeMap);
                    nestedMap.put("facet", auxMap1);

                    Map<String, Object> auxMap2 = new HashMap<>();
                    auxMap2.put(nested[1], nestedMap);
                    outputMap.put("facet", auxMap2);
                } else {
                    Map<String, Object> auxMap1 = new HashMap<>();
                    auxMap1.put(range[0], rangeMap);
                    outputMap.put("facet", auxMap1);
                }
            } else {
                String[] split = facet.replace("[", ":").replace("..", ":").replace("]", "").split(":");
                outputMap.put("field", split[0]);
                outputMap.put("start", parseNumber(split[1]));
                outputMap.put("end", parseNumber(split[2]));
                outputMap.put("step", parseNumber(split[3]));
            }
        } else {
            // Categorical...
            Matcher matcher = CATEGORICAL_PATTERN.matcher(facet);
            if (matcher.find()) {
                boolean isQuery = false;
                outputMap.put("field", matcher.group(1));
                String include = matcher.group(2);
                if (StringUtils.isNotEmpty(include)) {
                    include = include.replace("]", "").replace("[", "");
                    if (include.endsWith("*")) {
                        outputMap.put("prefix", include.substring(0, include.indexOf("*")));
                    } else {
                        isQuery = true;
                        List<String> filters = new ArrayList<>();
                        for (String value : include.split(INCLUDE_SEPARATOR)) {
                            filters.add(matcher.group(1) + ":" + value);
                        }
                        outputMap.put("q", StringUtils.join(filters, " OR "));
                    }
                }

                if (isQuery) {
                    outputMap.put("type", "query");

                    Map<String, Object> auxMap = new HashMap<>();
                    auxMap.put("field", matcher.group(1));
                    auxMap.put("type", "terms");
                    setTermLimit(matcher.group(3), auxMap);

                    Map<String, Object> tmpMap = new HashMap<>();
                    tmpMap.put(matcher.group(1), auxMap);

                    outputMap.put("facet", tmpMap);
                } else {
                    outputMap.put("type", "terms");
                    setTermLimit(matcher.group(3), outputMap);
                }
            } else {
                throw new Exception("Invalid categorical facet: " + facet);
            }
        }

        return outputMap;
    }

    private void setTermLimit(String limit, Map<String, Object> map) {
        if (StringUtils.isNotEmpty(limit)) {
            if (limit.contains("*")) {
                map.put("allBuckets", true);
            } else {
                map.put("limit", Integer.parseInt(limit.substring(1)));
            }
        } else {
            map.put("limit", DEFAULT_FACET_LIMIT);
        }
    }

    private void parseNestedFacet(String nestedFacet, Map<String, Object> jsonFacet) throws Exception {
        String[] split = nestedFacet.split(NESTED_FACET_SEPARATOR);

        Map<String, Object> rootFacetMap = new HashMap<>();
        Map<String, Object> childFacetMap = new HashMap<>();
        for (int i = split.length - 1; i >= 0; i--) {
            String facet = split[i];
            List<String> subfacets = getSubFacets(facet);
            for (String subfacet : subfacets) {
                parseSimpleFacet(subfacet, rootFacetMap);
            }

            if (!childFacetMap.isEmpty()) {
                for (Map.Entry<String, Object> entry : rootFacetMap.entrySet()) {
                    if (entry.getValue() instanceof Map) {
                        Map<String, Object> value = (Map<String, Object>) entry.getValue();
                        // We have to check if there is already a fieldname facet, it means we are processing
                        // a facet of type "query"
                        if (value.containsKey("facet")) {
                            Map<String, Object> innerMap = (Map<String, Object>) value.get("facet");
                            for (Map.Entry<String, Object> innerEntry : innerMap.entrySet()) {
                                if (innerEntry.getValue() instanceof Map) {
                                    ((Map<String, Object>) innerEntry.getValue()).put("facet", childFacetMap);
                                }
                            }
                        } else {
                            value.put("facet", childFacetMap);
                        }
                    }
                }
            }

            childFacetMap = rootFacetMap;
            rootFacetMap = new HashMap<>();
        }

        jsonFacet.putAll(childFacetMap);
    }

    public String parseJson(String facetJson) throws IOException {
        Queue<Map<String, Object>> myQueue = new LinkedList<>();
        Map jsonMap = new ObjectMapper().readValue(facetJson, Map.class);
        myQueue.add(jsonMap);

        while (!myQueue.isEmpty()) {
            Map<String, Object> map = myQueue.remove();
            for (Map.Entry<String, Object> entry : map.entrySet()) {
                if (entry.getValue() instanceof Map) {
                    Map<String, Object> innerMap = (Map<String, Object>) entry.getValue();

                    // Analyse map to fill in content
                    if (innerMap.containsKey("start")) {
                        // Ranges
                        innerMap.put("type", "range");
                        innerMap.put("gap", innerMap.get("step"));
                        innerMap.remove("step");
                    } else if (innerMap.containsKey("q")) {
                        // Query
                        innerMap.put("type", "query");
                    } else if (innerMap.containsKey("field")) {
                        // Categorical
                        innerMap.put("type", "terms");
                    }

                    // Check if there is a 'facet' field and insert all the items in the queue
                    Object facet = innerMap.get("facet");
                    if (facet != null) {
                        myQueue.add((Map<String, Object>) facet);
                    }
                }
            }
        }

        return new ObjectMapper().writeValueAsString(jsonMap);
    }

    static Number parseNumber(String number) {
        try {
            return Long.parseLong(number);
        } catch (NumberFormatException e) {
            return Double.parseDouble(number);
        }
    }

    static List<String> getSubFacets(String query) {
        String facet = "";
        List<String> facets = new ArrayList<>();
        String[] split = query.split(NESTED_SUBFACET_SEPARATOR);
        for (String s : split) {
            if (s.contains("[")) {
                if (s.contains("]")) {
                    facets.add(s);
                    facet = null;
                } else {
                    if (StringUtils.isEmpty(facet)) {
                        facet = s;
                    } else {
                        facets.add(facet);
                        facet = s;
                    }
                }
            } else if (s.contains("]")) {
                facets.add(facet + "," + s);
                facet = null;
            } else {
                if (StringUtils.isEmpty(facet)) {
                    facets.add(s);
                    facet = null;
                } else {
                    facet += ("," + s);
                }
            }
        }

        return facets;
    }
}