com.sindicetech.siren.solr.facet.SirenFieldFacetExtractor.java Source code

Java tutorial

Introduction

Here is the source code for com.sindicetech.siren.solr.facet.SirenFieldFacetExtractor.java

Source

/**
 * Copyright (c) 2014, Sindice Limited. All Rights Reserved.
 *
 * This file is part of the SIREn project.
 *
<<<<<<< HEAD:siren-solr-facet/src/main/java/org/sindice/siren/solr/facet/SirenFieldFacetExtractor.java
 * SIREn is not an open-source software. It is owned by Sindice Limited. SIREn
 * is licensed for evaluation purposes only under the terms and conditions of
 * the Sindice Limited Development License Agreement. Any form of modification
 * or reverse-engineering of SIREn is forbidden. SIREn is distributed without
 * any warranty.
=======
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
>>>>>>> develop:siren-solr-facet/src/main/java/com/sindicetech/siren/solr/facet/SirenFieldFacetExtractor.java
 */
package com.sindicetech.siren.solr.facet;

import com.sindicetech.siren.analysis.NumericAnalyzer;
import com.sindicetech.siren.analysis.NumericAnalyzer.NumericParser;
import com.sindicetech.siren.solr.schema.Datatype;
import com.sindicetech.siren.solr.schema.ExtendedJsonField;
import com.sindicetech.siren.solr.schema.TrieDatatype;
import org.apache.commons.lang.NullArgumentException;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.IndexSchema;
import org.codehaus.jackson.JsonNode;
import org.codehaus.jackson.JsonProcessingException;
import org.codehaus.jackson.map.ObjectMapper;
import org.codehaus.jackson.node.ArrayNode;
import org.codehaus.jackson.node.ObjectNode;
import org.codehaus.jackson.node.ValueNode;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map.Entry;

/**
 * <p>Given a json document ({@link SolrInputDocument}), generates a {@link SirenFacetEntry}
 * for each of its leaves.
 *
 * @experimental Can change in the next release.
 */
public class SirenFieldFacetExtractor implements FacetExtractor {
    private final Logger logger = LoggerFactory.getLogger(this.getClass());
    private IndexSchema schema;
    private static final ObjectMapper mapper = new ObjectMapper(); // ObjectMapper is thread-safe

    /**
     * Constructs SirenFieldFacetExtractor without setting the IndexSchema.
     *
     * {@link #setSchema(IndexSchema)} MUST be called before calling
     * {@link #extractFacets(SolrInputDocument)}
     *
     */
    public SirenFieldFacetExtractor() {
    }

    /**
     * Constructs SirenFieldFacetExtractor and sets IndexSchema.
     */
    public SirenFieldFacetExtractor(IndexSchema schema) {
        this.schema = schema;
    }

    public void setSchema(IndexSchema schema) {
        this.schema = schema;
    }

    public IndexSchema getSchema() {
        return schema;
    }

    /**
     * The main entry point of this class. Generates a list of {@link SirenFacetEntry} for
     * the given {@link SolrInputDocument} by performing a DFS through the doc.
     *
     * @param doc The document for which to generate facet entries
     * @throws IllegalStateException if IndexSchema was not set (either in constructor or via {@link #setSchema(IndexSchema)}
     */
    @Override
    public List<SirenFacetEntry> extractFacets(SolrInputDocument doc) throws FacetException {
        if (schema == null) {
            throw new IllegalStateException(
                    "Schema field is null - probably the default constructor was used without calling setSchema() later.");
        }

        List<SirenFacetEntry> facets = new ArrayList<SirenFacetEntry>();

        for (String fieldName : doc.getFieldNames()) {
            FieldType ft = schema.getFieldOrNull(fieldName).getType();
            if (ft instanceof ExtendedJsonField) {
                String sirenField = (String) doc.getFieldValue(fieldName);
                try {
                    JsonNode sirenNode = mapper.readTree(sirenField);

                    generateFacetsForLeaves(sirenNode, fieldName, (ExtendedJsonField) ft, "", facets);

                } catch (JsonProcessingException e) {
                    throw new FacetException("Could not parse siren field " + fieldName + ": " + e.getMessage(), e);
                } catch (IOException e) {
                    throw new FacetException(
                            "I/O problem while parsing siren field " + fieldName + ": " + e.getMessage(), e);
                }
            }
        }

        return facets;
    }

    /**
     * The entry point of the generateFacetsForLeaves() methods.
     *
     * DFS through the sirenNode JsonNode. Generates a {@link SirenFacetEntry} for each
     * leaf.
     *
     * @param sirenNode The Json to walk through.
     * @param fieldName The name of the ExtendedJsonField of the original SolrDocument the value of which is sirenNode.
     * @param path The path currently visited by the DFS algorithms. Should be an empty String "" in the initial call.
     * @param facets The entries generated for the leaves. Should be an not null list.
     *
     * @throws NullArgumentException if path or facets are null.
     */
    protected void generateFacetsForLeaves(JsonNode sirenNode, String fieldName, ExtendedJsonField field,
            String path, List<SirenFacetEntry> facets) {
        if (facets == null) {
            throw new NullArgumentException("Parameter facets must not be null");
        }
        if (path == null) {
            throw new NullArgumentException("Parameter path must not be null");
        }

        if (sirenNode.isValueNode()) {
            generateFacetsForLeaves((ValueNode) sirenNode, fieldName, field, path, facets);
        }

        if (sirenNode.isArray()) {
            generateFacetsForLeaves((ArrayNode) sirenNode, fieldName, field, path, facets);
        }

        if (sirenNode.isObject()) {
            generateFacetsForLeaves((ObjectNode) sirenNode, fieldName, field, path, facets);
        }
    }

    /**
     * The generateFacetsForLeaves() method for processing ValueNode, that is leaves.
     * Ends recursion and generates a new {@link SirenFacetEntry}.
     */
    private void generateFacetsForLeaves(ValueNode value, String fieldName, ExtendedJsonField field, String path,
            List<SirenFacetEntry> facets) {
        SirenFacetEntry entry = new SirenFacetEntry();
        entry.fieldName = fieldName;
        entry.path = path;

        if (value.isNull()) {
            entry.value = value.asText();
            entry.datatype = FacetDatatype.NULL;
        } else if (value.isInt()) {
            entry.value = value.asInt();
            entry.datatype = FacetDatatype.LONG;
        } else if (value.isLong()) {
            entry.value = value.asLong();
            entry.datatype = FacetDatatype.LONG;
        } else if (value.isDouble()) {
            entry.value = value.asDouble();
            entry.datatype = FacetDatatype.DOUBLE;
        } else if (value.isBoolean()) {
            entry.value = value.asBoolean();
            entry.datatype = FacetDatatype.BOOLEAN;
        } else {
            entry.value = value.asText();
            entry.datatype = FacetDatatype.STRING;
        }

        facets.add(entry);
    }

    /**
     * The generateFacetsForLeaves() method for processing json arrays, simply delegates for each array element.
     */
    private void generateFacetsForLeaves(ArrayNode array, String fieldName, ExtendedJsonField field, String path,
            List<SirenFacetEntry> facets) {
        Iterator<JsonNode> iterator = array.getElements();

        while (iterator.hasNext()) {
            JsonNode node = iterator.next();

            generateFacetsForLeaves(node, fieldName, field, path, facets);
        }
    }

    /**
     * The generateFacetsForLeaves() method for processing json objects, delegates for each field and constructs path.
     */
    private void generateFacetsForLeaves(ObjectNode object, String fieldName, ExtendedJsonField sirenField,
            String path, List<SirenFacetEntry> facets) {
        Iterator<Entry<String, JsonNode>> iterator = object.getFields();

        while (iterator.hasNext()) {
            Entry<String, JsonNode> entry = iterator.next();
            String field = entry.getKey();
            JsonNode value = entry.getValue();

            if (field.equals("_datatype_") || field.equals("_value_")) {
                generateFacetsForCustomDatatypeLeaf(object, fieldName, sirenField, path, facets);
                return;
            }

            generateFacetsForLeaves(value, fieldName, sirenField, path.isEmpty() ? field : path + "." + field,
                    facets);
        }
    }

    private void generateFacetsForCustomDatatypeLeaf(ObjectNode object, String fieldName,
            ExtendedJsonField sirenField, String path, List<SirenFacetEntry> facets) {
        Iterator<Entry<String, JsonNode>> iterator = object.getFields();

        String datatype = null;
        String value = null;

        while (iterator.hasNext()) {
            Entry<String, JsonNode> entry = iterator.next();
            if ("_datatype_".equals(entry.getKey())) {
                datatype = entry.getValue().asText();
            } else if ("_value_".equals(entry.getKey())) {
                value = entry.getValue().asText();
            } else {
                logger.warn("Unexpected field {} in custom datatype object: {}", entry.getKey(), object.asText());
                continue;
            }
        }

        if (datatype == null || value == null) {
            logger.warn(
                    "Unexpected form of custom datatype object: {}. Not generating facets for this nested object.",
                    object.asText());
            return;
        }

        SirenFacetEntry entry = new SirenFacetEntry();
        entry.fieldName = fieldName;
        entry.path = path;

        Datatype customDatatype = sirenField.getDatatypes().get(datatype);
        if (customDatatype instanceof TrieDatatype) {
            NumericAnalyzer analyzer = (NumericAnalyzer) customDatatype.getAnalyzer();
            NumericParser parser = analyzer.getNumericParser();
            try {
                Number number = parser.parse(new StringReader(value));
                if ((number instanceof Float) || (number instanceof Double)) {
                    entry.datatype = FacetDatatype.DOUBLE;
                    entry.value = number.doubleValue();
                } else if ((number instanceof Integer) || (number instanceof Long)) {
                    entry.datatype = FacetDatatype.LONG;
                    entry.value = number.longValue();
                } else {
                    logger.warn(
                            "Unknown number type {} in custom datatype in nested object {}. Not creating facet field.",
                            number.getClass().getCanonicalName(), object.asText());
                    return;
                }
            } catch (IOException e) {
                logger.warn("Problem parsing custom datatype {} in nested object {}: " + e.getMessage(), datatype,
                        object.asText());
                return;
            }
        } else {
            entry.datatype = FacetDatatype.STRING;
            entry.value = value;
        }

        facets.add(entry);
    }
}