eu.scape_project.planning.evaluation.evaluators.XmlExtractor.java Source code

Introduction

Here is the source code for eu.scape_project.planning.evaluation.evaluators.XmlExtractor.java
Source

/*******************************************************************************
 * Copyright 2006 - 2012 Vienna University of Technology,
 * Department of Software Technology and Interactive Systems, IFS
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *   http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 ******************************************************************************/
package eu.scape_project.planning.evaluation.evaluators;

import java.io.IOException;
import java.io.Serializable;
import java.util.HashMap;

import javax.xml.namespace.NamespaceContext;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpression;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;

import org.apache.commons.lang.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;

import eu.scape_project.planning.model.scales.Scale;
import eu.scape_project.planning.model.values.Value;

/**
 * This is a generic helper class that takes an XPath expression and uses it to
 * search a specified XML and extract a @link {@link Value}
 * 
 * @author cb
 * 
 */
public class XmlExtractor implements Serializable {
    private static final long serialVersionUID = -8844757294292266759L;

    private static Logger log = LoggerFactory.getLogger(XmlExtractor.class);

    private NamespaceContext namespaceContext;

    public NamespaceContext getNamespaceContext() {
        return namespaceContext;
    }

    public void setNamespaceContext(NamespaceContext context) {
        this.namespaceContext = context;
    }

    public Value extractValue(Document xml, Scale scale, String xpath, String commentXPath) {
        try {
            Document pcdlDoc = xml;
            String text = extractTextInternal(pcdlDoc, xpath);
            Value v = null;
            // if the query provides no result, we cannot extract a value
            if (!StringUtils.isEmpty(text)) {
                v = scale.createValue();
                v.parse(text);
                if (commentXPath != null) {
                    String comment = extractTextInternal(pcdlDoc, commentXPath);
                    v.setComment(comment);
                }
            }
            return v;

        } catch (Exception e) {
            log.error("Could not parse XML " + " searching for path " + xpath + ": " + e.getMessage(), e);
            return null;
        }
    }

    public String extractText(Document xml, String xpath) {
        try {
            Document pcdlDoc = xml;
            String text = extractTextInternal(pcdlDoc, xpath);
            return text;
        } catch (Exception e) {
            log.error("Could not parse XML " + " searching for path " + xpath + ": " + e.getMessage(), e);
            return null;
        }
    }

    public Document getDocument(InputSource xml) throws ParserConfigurationException, SAXException, IOException {
        // extract value via XPath
        DocumentBuilderFactory domFactory = DocumentBuilderFactory.newInstance();
        domFactory.setNamespaceAware(true); // never forget this!
        DocumentBuilder builder = domFactory.newDocumentBuilder();
        Document pcdlDoc = builder.parse(xml);
        return pcdlDoc;
    }

    public Value extractAttributeValue() {
        return null;
    }

    /**
     * very useful: {@link http
     * ://www.ibm.com/developerworks/library/x-javaxpathapi.html}
     * 
     * @param doc
     * @param path
     * @param scale
     * @return
     * @throws ParserConfigurationException
     * @throws SAXException
     * @throws IOException
     * @throws XPathExpressionException
     */
    private String extractTextInternal(Document doc, String path)
            throws ParserConfigurationException, SAXException, IOException, XPathExpressionException {

        XPathFactory factory = XPathFactory.newInstance();

        XPath xpath = factory.newXPath();
        xpath.setNamespaceContext(namespaceContext);
        XPathExpression expr = xpath.compile(path);
        try {
            String result = (String) expr.evaluate(doc, XPathConstants.STRING);
            return result;
        } catch (Exception e) {
            log.error("XML extraction for path " + path + " failed: " + e.getMessage(), e);
            return "XML extraction for path " + path + " failed: " + e.getMessage();
        }
    }

    public HashMap<String, String> extractValues(Document xml, String path) {
        try {
            HashMap<String, String> resultMap = new HashMap<String, String>();

            XPathFactory factory = XPathFactory.newInstance();

            XPath xpath = factory.newXPath();
            xpath.setNamespaceContext(namespaceContext);
            XPathExpression expr = xpath.compile(path);

            NodeList list = (NodeList) expr.evaluate(xml, XPathConstants.NODESET);
            if (list != null) {
                for (int i = 0; i < list.getLength(); i++) {
                    Node n = list.item(i);
                    String content = n.getTextContent();
                    if (content != null) {
                        resultMap.put(n.getLocalName(), content);
                    }
                }
            }
            return resultMap;
        } catch (Exception e) {
            log.error("Could not parse XML " + " searching for path " + path + ": " + e.getMessage(), e);
            return null;
        }
    }

}