uk.co.flax.biosolr.builders.ParentNodeFacetTreeBuilder.java Source code

Java tutorial

Introduction

Here is the source code for uk.co.flax.biosolr.builders.ParentNodeFacetTreeBuilder.java

Source

/**
 * Copyright (c) 2015 Lemur Consulting Ltd.
 * <p/>
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * <p/>
 * http://www.apache.org/licenses/LICENSE-2.0
 * <p/>
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package uk.co.flax.biosolr.builders;

import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.stream.Collectors;

import org.apache.commons.lang.StringUtils;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.search.DocIterator;
import org.apache.solr.search.DocSet;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.search.SyntaxError;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import uk.co.flax.biosolr.FacetTreeParameters;
import uk.co.flax.biosolr.TreeFacetField;

/**
 * FacetTreeBuilder implementation that uses parent node IDs to build a
 * tree from the bottom node upwards.
 * 
 * <p>
 * Minimum required parameters for this tree builder are the node field,
 * either passed in local parameters or taken from the key value, and 
 * the parent node field. {@link #initialiseParameters(SolrParams)} will
 * throw a SyntaxError if these values are not defined.
 * </p>
 *
 * @author mlp
 */
public class ParentNodeFacetTreeBuilder extends AbstractFacetTreeBuilder {

    private static final Logger LOGGER = LoggerFactory.getLogger(ParentNodeFacetTreeBuilder.class);

    private String parentField;
    private int maxLevels;

    private final Set<String> docFields = new HashSet<>();

    @Override
    public void initialiseParameters(SolrParams localParams) throws SyntaxError {
        // Initialise the common fields
        super.initialiseParameters(localParams);

        // Initialise the parent field - REQUIRED
        parentField = localParams.get(FacetTreeParameters.PARENT_FIELD_PARAM);
        if (StringUtils.isBlank(parentField)) {
            throw new SyntaxError("Missing parent field definition in " + localParams);
        }

        //  Initialise the optional fields
        maxLevels = localParams.getInt(FacetTreeParameters.LEVELS_PARAM, 0);

        docFields.addAll(Arrays.asList(getNodeField(), parentField));
        if (hasLabelField()) {
            docFields.add(getLabelField());
        }
    }

    @Override
    public List<TreeFacetField> processFacetTree(SolrIndexSearcher searcher, Map<String, Integer> facetMap)
            throws IOException {
        checkFieldsInSchema(searcher, docFields);

        // Extract the facet keys to a volatile set
        Set<String> facetKeys = new HashSet<>(facetMap.keySet());

        // Build a map of parent - child node IDs. This should contain the parents
        // of all our starting facet terms.
        Map<String, Set<String>> nodeChildren = findParentEntries(searcher, facetKeys);

        // Find the top nodes
        Set<String> topNodes = findTopLevelNodes(nodeChildren);
        LOGGER.debug("Found {} top level nodes", topNodes.size());

        // Convert to a list of TreeFacetFields
        return topNodes.parallelStream().map(node -> buildAccumulatedEntryTree(0, node, nodeChildren, facetMap))
                .collect(Collectors.toList());
    }

    /**
     * Find all parent nodes for the given set of items.
     * @param searcher the searcher for the collection being used.
     * @param facetValues the starting set of node IDs.
     * @return a map of nodes, keyed by their IDs.
     * @throws IOException
     */
    private Map<String, Set<String>> findParentEntries(SolrIndexSearcher searcher, Collection<String> facetValues)
            throws IOException {
        Map<String, Set<String>> nodeParentIds = new HashMap<>();

        Set<String> nodesFound = new HashSet<>();
        Set<String> nodeIds = new HashSet<>(facetValues);

        int count = 0;
        while (nodeIds.size() > 0 && (maxLevels == 0 || maxLevels >= count)) {
            // Find the direct parents for the current node IDs
            Map<String, Set<String>> parents = findParentIdsForNodes(searcher, nodeIds);
            nodeParentIds.putAll(parents);
            nodesFound.addAll(nodeIds);

            // Get the parent IDs from all the retrieved nodes - these are the next set of
            // nodes whose parents should be found.
            nodeIds = parents.values().stream().flatMap(v -> v.stream()).filter(id -> !nodesFound.contains(id))
                    .collect(Collectors.toSet());

            count++;
        }
        ;

        // Now, invert the map, so it's a map of parent->child IDs
        Map<String, Set<String>> parentChildIds = new HashMap<>();
        for (Entry<String, Set<String>> entry : nodeParentIds.entrySet()) {
            for (String parentId : entry.getValue()) {
                if (!parentChildIds.containsKey(parentId)) {
                    parentChildIds.put(parentId, new HashSet<String>());
                }
                parentChildIds.get(parentId).add(entry.getKey());
            }
        }

        return parentChildIds;
    }

    private Map<String, Set<String>> findParentIdsForNodes(SolrIndexSearcher searcher, Collection<String> nodeIds)
            throws IOException {
        Map<String, Set<String>> parentIds = new HashMap<>();

        LOGGER.debug("Looking up parents for {} nodes", nodeIds.size());
        Query filter = buildFilterQuery(getNodeField(), nodeIds);
        LOGGER.trace("Filter query: {}", filter);

        DocSet docs = searcher.getDocSet(filter);

        for (DocIterator it = docs.iterator(); it.hasNext();) {
            Document doc = searcher.doc(it.nextDoc(), docFields);
            String nodeId = doc.get(getNodeField());

            Set<String> parentIdValues = new HashSet<>(Arrays.asList(doc.getValues(parentField)));
            parentIds.put(nodeId, parentIdValues);

            // Record the label, if required
            if (isLabelRequired(nodeId)) {
                recordLabel(nodeId, doc.getValues(getLabelField()));
            }
        }

        return parentIds;
    }

    /**
     * Build a filter query for a field using a set of values, taken from the keys
     * of a {@link NamedList}.
     * @param field
     * @param values
     * @return a filter string.
     */
    private Query buildFilterQuery(String field, Collection<String> values) {
        BooleanQuery.Builder builder = new BooleanQuery.Builder().setDisableCoord(true);

        values.stream().map(v -> new TermQuery(new Term(field, v))).forEach(tq -> builder.add(tq, Occur.SHOULD));

        return builder.build();
    }

    /**
     * Recursively build an accumulated facet entry tree.
     * @param level current level in the tree (used for debugging/logging).
     * @param fieldValue the current node value.
     * @param hierarchyMap the map of nodes (either in the original facet set,
     * or parents of those entries).
     * @param facetCounts the facet counts, keyed by node ID.
     * @return a {@link TreeFacetField} containing details for the current node and all
     * sub-nodes down to the lowest leaf which has a facet count.
     */
    private TreeFacetField buildAccumulatedEntryTree(int level, String fieldValue,
            Map<String, Set<String>> hierarchyMap, Map<String, Integer> facetCounts) {
        // Build the child hierarchy for this entry.
        // We use a reverse-ordered SortedSet so entries are returned in descending
        // order by their total count.
        SortedSet<TreeFacetField> childHierarchy = new TreeSet<>(Collections.reverseOrder());

        // childTotal is the total number of facet hits below this node
        long childTotal = 0;
        if (hierarchyMap.containsKey(fieldValue)) {
            // Loop through all the direct child URIs, looking for those which are in the annotation map
            for (String childId : hierarchyMap.get(fieldValue)) {
                if (!childId.equals(fieldValue)) {
                    // Found a child of this node - recurse to build its facet tree
                    LOGGER.trace("[{}] Building child tree for {}, with {} children", level, childId,
                            (hierarchyMap.containsKey(childId) ? hierarchyMap.get(childId).size() : 0));
                    TreeFacetField childTree = buildAccumulatedEntryTree(level + 1, childId, hierarchyMap,
                            facetCounts);

                    // Only add to the total count if this node isn't already in the child hierarchy
                    if (childHierarchy.add(childTree)) {
                        childTotal += childTree.getTotal();
                    }
                    LOGGER.trace("[{}] child tree total: {} - child Total {}, child count {}", level,
                            childTree.getTotal(), childTotal, childHierarchy.size());
                } else {
                    LOGGER.trace("[{}] found self-referring ID {}->{}", level, fieldValue, childId);
                }
            }
        }

        // Build the accumulated facet entry
        LOGGER.trace("[{}] Building facet tree for {}", level, fieldValue);
        return new TreeFacetField(getLabel(fieldValue), fieldValue, getFacetCount(fieldValue, facetCounts),
                childTotal, childHierarchy);
    }

    /**
     * Get the count for the facet with the given key.
     * @param key the key to look up.
     * @param facetCounts the map of facet counts.
     * @return the count, or <code>0</code> if the key does not exist in the map.
     */
    private long getFacetCount(String key, Map<String, Integer> facetCounts) {
        if (facetCounts.containsKey(key)) {
            return facetCounts.get(key);
        }
        return 0;
    }

    @Override
    protected Logger getLogger() {
        return LOGGER;
    }

}