net.di2e.ecdr.libs.result.relevance.TextParser.java Source code

Java tutorial

Introduction

Here is the source code for net.di2e.ecdr.libs.result.relevance.TextParser.java

Source

/**
 * Copyright (c) Cohesive Integrations, LLC
 * Copyright (c) Codice Foundation
 *
 * This is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation, either version 3 of the License, or any later version.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied
 * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
 * details. A copy of the GNU Lesser General Public License is distributed along with this program and can be found at
 * <http://www.gnu.org/licenses/lgpl.html>.
 *
 **/
package net.di2e.ecdr.libs.result.relevance;

import org.apache.commons.lang.StringUtils;
import org.codehaus.stax2.XMLInputFactory2;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamConstants;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;
import java.io.StringReader;

public final class TextParser {

    private static final Logger LOGGER = LoggerFactory.getLogger(TextParser.class);

    private static XMLInputFactory xmlInputFactory = null;

    static {
        ClassLoader tccl = Thread.currentThread().getContextClassLoader();
        try {
            Thread.currentThread().setContextClassLoader(TextParser.class.getClassLoader());

            xmlInputFactory = XMLInputFactory2.newInstance();
            xmlInputFactory.setProperty(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES, Boolean.FALSE);
            xmlInputFactory.setProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES, Boolean.FALSE);
            xmlInputFactory.setProperty(XMLInputFactory.IS_COALESCING, Boolean.FALSE);
            xmlInputFactory.setProperty(XMLInputFactory.IS_VALIDATING, Boolean.FALSE);
        } finally {
            Thread.currentThread().setContextClassLoader(tccl);
        }
    }

    private TextParser() {

    }

    /**
     * Given xml as a string, this method will parse out element text and CDATA text. It separates
     * each by one space character.
     *
     * @param xmlData
     *            XML as a {@code String}
     * @return parsed CDATA and element text
     */
    protected static String parseTextFrom(String xmlData) {

        StringBuilder builder = new StringBuilder();

        XMLStreamReader xmlStreamReader;

        try {
            // xml parser does not handle leading whitespace
            xmlStreamReader = xmlInputFactory.createXMLStreamReader(new StringReader(xmlData));

            while (xmlStreamReader.hasNext()) {
                int event = xmlStreamReader.next();

                if (event == XMLStreamConstants.CHARACTERS || event == XMLStreamConstants.CDATA) {

                    String text = xmlStreamReader.getText();

                    if (StringUtils.isNotBlank(text)) {
                        builder.append(" " + text.trim());
                    }

                }
                if (event == XMLStreamConstants.START_ELEMENT) {
                    for (int i = 0; i < xmlStreamReader.getAttributeCount(); i++) {

                        String text = xmlStreamReader.getAttributeValue(i);

                        if (StringUtils.isNotBlank(text)) {
                            builder.append(" " + text.trim());
                        }
                    }
                }
            }
        } catch (XMLStreamException e1) {
            LOGGER.warn("Failure occurred in parsing the xml data (" + xmlData
                    + "). No data has been stored or indexed.", e1);
        }

        return builder.toString();
    }

}