org.apache.solr.handler.AnalysisRequestHandler.java Source code

Introduction

Here is the source code for org.apache.solr.handler.AnalysisRequestHandler.java
Source

package org.apache.solr.handler;
/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import org.apache.commons.io.IOUtils;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.util.Attribute;
import org.apache.lucene.util.AttributeReflector;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.ContentStream;
import org.apache.solr.common.util.ContentStreamBase;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.SchemaField;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamConstants;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.io.StringReader;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;

/**
 *
 * @deprecated Use {@link org.apache.solr.handler.DocumentAnalysisRequestHandler} instead.
 **/
@Deprecated
public class AnalysisRequestHandler extends RequestHandlerBase {

    public static Logger log = LoggerFactory.getLogger(AnalysisRequestHandler.class);

    private XMLInputFactory inputFactory;

    @Override
    public void init(NamedList args) {
        super.init(args);

        inputFactory = XMLInputFactory.newInstance();
        try {
            // The java 1.6 bundled stax parser (sjsxp) does not currently have a thread-safe
            // XMLInputFactory, as that implementation tries to cache and reuse the
            // XMLStreamReader.  Setting the parser-specific "reuse-instance" property to false
            // prevents this.
            // All other known open-source stax parsers (and the bea ref impl)
            // have thread-safe factories.
            inputFactory.setProperty("reuse-instance", Boolean.FALSE);
        } catch (IllegalArgumentException ex) {
            // Other implementations will likely throw this exception since "reuse-instance"
            // isimplementation specific.
            log.debug("Unable to set the 'reuse-instance' property for the input factory: " + inputFactory);
        }
    }

    @Override
    public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception {
        SolrParams params = req.getParams();
        Iterable<ContentStream> streams = req.getContentStreams();
        if (streams != null) {
            for (ContentStream stream : req.getContentStreams()) {
                InputStream is = null;
                XMLStreamReader parser = null;
                try {
                    is = stream.getStream();
                    final String charset = ContentStreamBase.getCharsetFromContentType(stream.getContentType());
                    parser = (charset == null) ? inputFactory.createXMLStreamReader(is)
                            : inputFactory.createXMLStreamReader(is, charset);

                    NamedList<Object> result = processContent(parser, req.getSchema());
                    rsp.add("response", result);
                } finally {
                    if (parser != null)
                        parser.close();
                    IOUtils.closeQuietly(is);
                }
            }
        }
    }

    NamedList<Object> processContent(XMLStreamReader parser, IndexSchema schema)
            throws XMLStreamException, IOException {
        NamedList<Object> result = new SimpleOrderedMap<Object>();
        while (true) {
            int event = parser.next();
            switch (event) {
            case XMLStreamConstants.END_DOCUMENT: {
                parser.close();
                return result;
            }
            case XMLStreamConstants.START_ELEMENT: {
                String currTag = parser.getLocalName();
                if ("doc".equals(currTag)) {
                    log.trace("Tokenizing doc...");

                    SolrInputDocument doc = readDoc(parser);
                    SchemaField uniq = schema.getUniqueKeyField();
                    NamedList<NamedList<NamedList<Object>>> theTokens = new SimpleOrderedMap<NamedList<NamedList<Object>>>();
                    result.add(doc.getFieldValue(uniq.getName()).toString(), theTokens);
                    for (String name : doc.getFieldNames()) {
                        FieldType ft = schema.getFieldType(name);
                        Analyzer analyzer = ft.getAnalyzer();
                        Collection<Object> vals = doc.getFieldValues(name);
                        for (Object val : vals) {
                            Reader reader = new StringReader(val.toString());
                            TokenStream tstream = analyzer.tokenStream(name, reader);
                            NamedList<NamedList<Object>> tokens = getTokens(tstream);
                            theTokens.add(name, tokens);
                        }
                    }
                }
                break;
            }
            }
        }
    }

    // a static mapping of the reflected attribute keys to the names used in Solr 1.3/1.4
    static Map<String, String> ATTRIBUTE_MAPPING = Collections.unmodifiableMap(new HashMap<String, String>() {
        {
            put(CharTermAttribute.class.getName() + "#term", "value");
            put(OffsetAttribute.class.getName() + "#startOffset", "start");
            put(OffsetAttribute.class.getName() + "#endOffset", "end");
            put(PositionIncrementAttribute.class.getName() + "#positionIncrement", "posInc");
            put(TypeAttribute.class.getName() + "#type", "type");
        }
    });

    static NamedList<NamedList<Object>> getTokens(TokenStream tstream) throws IOException {
        // outer is namedList since order of tokens is important
        NamedList<NamedList<Object>> tokens = new NamedList<NamedList<Object>>();

        while (tstream.incrementToken()) {
            final NamedList<Object> token = new SimpleOrderedMap<Object>();
            tokens.add("token", token);
            tstream.reflectWith(new AttributeReflector() {
                public void reflect(Class<? extends Attribute> attClass, String key, Object value) {
                    String k = attClass.getName() + '#' + key;
                    // map keys for "standard attributes":
                    if (ATTRIBUTE_MAPPING.containsKey(k)) {
                        k = ATTRIBUTE_MAPPING.get(k);
                    }
                    token.add(k, value);
                }
            });
        }
        return tokens;
    }

    SolrInputDocument readDoc(XMLStreamReader parser) throws XMLStreamException {
        SolrInputDocument doc = new SolrInputDocument();

        StringBuilder text = new StringBuilder();
        String name = null;
        String attrName = "";
        float boost = 1.0f;
        boolean isNull = false;
        while (true) {
            int event = parser.next();
            switch (event) {
            // Add everything to the text
            case XMLStreamConstants.SPACE:
            case XMLStreamConstants.CDATA:
            case XMLStreamConstants.CHARACTERS:
                text.append(parser.getText());
                break;

            case XMLStreamConstants.END_ELEMENT:
                if ("doc".equals(parser.getLocalName())) {
                    return doc;
                } else if ("field".equals(parser.getLocalName())) {
                    if (!isNull) {
                        doc.addField(name, text.toString(), boost);
                        boost = 1.0f;
                    }
                }
                break;

            case XMLStreamConstants.START_ELEMENT:
                text.setLength(0);
                String localName = parser.getLocalName();
                if (!"field".equals(localName)) {
                    log.warn("unexpected XML tag doc/" + localName);
                    throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
                            "unexpected XML tag doc/" + localName);
                }

                String attrVal = "";
                for (int i = 0; i < parser.getAttributeCount(); i++) {
                    attrName = parser.getAttributeLocalName(i);
                    attrVal = parser.getAttributeValue(i);
                    if ("name".equals(attrName)) {
                        name = attrVal;
                    }
                }
                break;
            }
        }
    }

    //////////////////////// SolrInfoMBeans methods //////////////////////
    @Override
    public String getDescription() {
        return "Provide Analysis of text";
    }

    @Override
    public String getVersion() {
        return "$Revision:$";
    }

    @Override
    public String getSourceId() {
        return "$Id:$";
    }

    @Override
    public String getSource() {
        return "$URL:$";
    }

}