com.fluidops.iwb.provider.XMLProvider.java Source code

Java tutorial

Introduction

Here is the source code for com.fluidops.iwb.provider.XMLProvider.java

Source

/*
 * Copyright (C) 2008-2013, fluid Operations AG
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
    
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
    
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */

package com.fluidops.iwb.provider;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.Serializable;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import javax.xml.namespace.NamespaceContext;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpression;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;

import org.apache.commons.io.IOUtils;
import org.apache.log4j.Logger;
import org.openrdf.model.Literal;
import org.openrdf.model.Statement;
import org.openrdf.model.URI;
import org.openrdf.model.ValueFactory;
import org.openrdf.model.impl.ValueFactoryImpl;
import org.openrdf.model.vocabulary.OWL;
import org.openrdf.model.vocabulary.RDF;
import org.openrdf.model.vocabulary.RDFS;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.tidy.Tidy;
import org.xml.sax.SAXException;

import com.fluidops.api.security.SHA512;
import com.fluidops.iwb.api.EndpointImpl;
import com.fluidops.iwb.datasource.TreeDataSource;
import com.fluidops.iwb.model.ParameterConfigDoc;
import com.fluidops.iwb.model.TypeConfigDoc;
import com.fluidops.iwb.model.ParameterConfigDoc.Type;
import com.fluidops.iwb.util.IWBFileUtil;
import com.fluidops.iwb.util.User;
import com.fluidops.util.Base64;
import com.fluidops.util.Pair;
import com.fluidops.util.StringUtil;
import com.fluidops.util.XML;
import com.sun.org.apache.xpath.internal.jaxp.XPathFactoryImpl;

/**
 * XML Provider based on internal mapping language for transforming elements
 * from the XML document into RDF. Requires a mapping file and valid XML as input.
 * 
 * Example Specification XML file:
 * <code>
   <mappingSpec>
      <rule id="person" nodeBase="//fops:person" owlType="Person" objectId="person-{./@email}" objectLabel="{./@firstName} {./@lastName}">
         <datatypePropertyMapping value="Year {./@yob}" owlProperty="yearOfBirth" />
         <objectPropertyMapping nodeBase="." value="{./@worksFor}" owlProperty="company" referredRule="company" />
      </rule>
      <rule id="company" nodeBase="//fops:company" owlType="Organization" objectId="{./@id}" objectLabel="{./@name}" />";
      <rule id="nodeWithoutId" nodeBase="/fops:doc/fops:nodeWithoutId" owlType="NodeWithoutId" />
   </mappingSpec>
   </code>
 * 
 * @author msc
 */
@TypeConfigDoc("XML Provider transforms an XML document into RDF, using a mapping language.  This provider requires a mapping file and valid XML as input.")
public class XMLProvider extends AbstractFlexProvider<XMLProvider.Config> {
    private static final long serialVersionUID = 7415666290518242634L;

    private static final Logger logger = Logger.getLogger(XMLProvider.class.getName());

    private static final ValueFactory vf = ValueFactoryImpl.getInstance();

    private static XPathFactory xpf = null;

    private static Pattern PARAMETRIZED_EXPRESSION_PATTERN = Pattern.compile("\\{([^\\}]*)\\}");

    private transient ProviderURIResolver uriResolver;

    private transient NamespaceContext ctx;

    @Override
    public void gather(final List<Statement> res) throws Exception {
        HashMap<String, MappingRule> mappingRules = initializeGather();

        Document doc = null;
        if (config.dataSource != null) {

            TreeDataSource ds = config.lookupAndRefreshDataSource(TreeDataSource.class);
            doc = ds.getDocument();

        } else {
            // legacy support

            // load XML in DOM
            InputStream in = null;

            try {
                in = getInputStream();
                doc = getDocument(in);
            } finally {
                IOUtils.closeQuietly(in);
            }
        }

        // execute mapping rules in specification one by one
        for (MappingRule mr : mappingRules.values())
            processMappingRule(res, mappingRules, doc, mr);
    }

    /**
     * Provides the InputStream for the provider. At the moment, this can be a xml or (x)html file. 
     * Either accessible from within the IWB working dir or from remote i.g., http(s).
     * If a username and password is provided, HTTP BasicAuth will be used.
     * Extension may override this method in order to construct InputStreams from other sources.
     * 
     * @return a input
     * @throws IOException
     */
    protected InputStream getInputStream() throws IOException {
        //decide whether it is a remote or local file
        if ((config.xmlfile.startsWith("http://") || config.xmlfile.startsWith("https://"))) {
            URL url = new URL(config.xmlfile);
            HttpURLConnection connection = (HttpURLConnection) url.openConnection();
            // if username + password are provide try to use BasicAuth
            if (config.user != null)
                connection.setRequestProperty("Authorization",
                        "Basic " + Base64.encode(config.user.username + ":" + config.user.password(this)));

            connection.setRequestMethod("GET");
            return (InputStream) connection.getInputStream();
        } else {
            return new FileInputStream(IWBFileUtil.getFileInWorkingDir(config.xmlfile));
        }
    }

    /**
     * Constructs a w3c DOM from an InputStream. Depending on whether the InputStream is constructed from
     * an html or xml file, it will used JTidy for clean-up and constructing the document DOM.
     * @param in any input stream that contains (x)html/xml data
     * @return the DOM of an HTML or XML constructed from any InputStream
     * @throws SAXException
     * @throws IOException
     * @throws ParserConfigurationException
     */
    protected Document getDocument(InputStream in) throws SAXException, IOException, ParserConfigurationException {
        Tidy tidy = new Tidy();
        tidy.setHideComments(true);
        tidy.setTidyMark(false);
        tidy.setQuiet(true);
        tidy.setShowErrors(1);
        tidy.setShowWarnings(false);
        if (StringUtil.isNotNullNorEmpty(config.inputEncoding)) {
            tidy.setInputEncoding(config.inputEncoding);
        }
        tidy.setOutputEncoding("UTF-8");
        tidy.setMakeClean(true);

        Document doc = null;
        DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
        factory.setValidating(false);
        factory.setNamespaceAware(true);
        factory.setXIncludeAware(false);
        factory.setExpandEntityReferences(false);
        DocumentBuilder db = factory.newDocumentBuilder();

        // we assume that xml is provided, if no file extension .htm(l) is provided
        if (!config.xmlfile.toLowerCase().endsWith(".htm") && !config.xmlfile.toLowerCase().endsWith("html")) {
            // let jtidy know that it needs to handle xml, instead of html
            tidy.setXmlOut(true);
            tidy.setXmlTags(true);

            // need to have another (re)rewrite of the DOM in order to gain a clean w3c.dom 
            ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
            InputStream is = null;
            try {
                doc = tidy.parseDOM(in, outputStream);
                is = new ByteArrayInputStream(outputStream.toByteArray());
                doc = db.parse(is);
            } finally {
                IOUtils.closeQuietly(outputStream);
                IOUtils.closeQuietly(is);
            }
        } else {
            doc = tidy.parseDOM(in, null);
        }
        return doc;
    }

    @Override
    public void gatherOntology(final List<Statement> ontology) throws Exception {
        HashMap<String, MappingRule> mappingRules = initializeGather();

        // iterate over the rules and extract ontology one by one
        for (MappingRule mr : mappingRules.values()) {
            Map<String, URI> types = new HashMap<String, URI>();

            // Step 1: extract type information, type it as owl:Class with label
            for (String owlType : mr.owlTypes) {
                URI type = null;
                if (owlType.contains("=")) {
                    String keyVal[] = owlType.split("=");
                    type = EndpointImpl.api().getNamespaceService().guessURIOrCreateInDefaultNS(keyVal[1]);
                    types.put(keyVal[0], type);
                } else {
                    type = EndpointImpl.api().getNamespaceService().guessURIOrCreateInDefaultNS(owlType);
                    types.put("*", type);
                }
                ontology.add(vf.createStatement(type, RDF.TYPE, OWL.CLASS));
                ontology.add(vf.createStatement(type, RDFS.LABEL, vf.createLiteral(type.getLocalName())));
            }

            // Step 2: extract datatype properties
            for (DatatypePropertyMapping dpMapping : mr.datatypePropMappings) {
                for (URI type : types.values()) {
                    // type predicate and assign label
                    URI predicate = uriResolver.resolveProperty(dpMapping.owlProperty, type, OWL.DATATYPEPROPERTY);

                    // store in ontology
                    if (predicate != null)
                        ontology.add(vf.createStatement(predicate, RDF.TYPE, OWL.DATATYPEPROPERTY));
                }
            }

            // handle object properties
            for (ObjectPropertyMapping opMapping : mr.objectPropertyMappings) {
                for (URI type : types.values()) {
                    // type predicate and assign label
                    URI predicate = uriResolver.resolveProperty(opMapping.owlProperty, type, OWL.OBJECTPROPERTY);

                    // store in ontology
                    if (predicate != null)
                        ontology.add(vf.createStatement(predicate, RDF.TYPE, OWL.OBJECTPROPERTY));
                }
            }
        }

        // write ontology for predicates
        for (Pair<URI, URI> resolvedProp : uriResolver.resolvedProperties()) {
            URI prop = resolvedProp.fst;
            URI type = resolvedProp.snd;

            if (prop != null && type != null) {
                ontology.add(vf.createStatement(prop, RDF.TYPE, type));

                Statement stmt = vf.createStatement(prop, RDFS.LABEL, vf.createLiteral(prop.getLocalName()));
                ontology.add(stmt);
            }
        }
    }

    @Override
    public Class<? extends Config> getConfigClass() {
        return Config.class;
    }

    /**
     * Shared initialization code for gather and gatherOntology methods.
     * 
     * @return
     * @throws Exception
     */
    public HashMap<String, MappingRule> initializeGather() throws Exception {
        // mapping from prefixes to namespaces, as defined in the config
        ctx = getNamespaceContextFromConfig();

        initializeXPathFactory();

        // initialize property resolver
        uriResolver = new ProviderURIResolver(config.globalResolver);

        // load specification
        if (config.mappingfile == null)
            return null;

        File mappingFile = IWBFileUtil.getFileInWorkingDir(config.mappingfile);
        HashMap<String, MappingRule> mappingRules = parseMappingFile(mappingFile);
        return mappingRules;
    }

    /**
     * Process a single mapping rule.
     * 
     * @param stmts
     * @param mappingRules
     * @param doc
     * @param mr
     * @throws XPathExpressionException
     */
    public void processMappingRule(List<Statement> stmts, HashMap<String, MappingRule> mappingRules, Node doc,
            MappingRule mr) throws XPathExpressionException {
        ///////////////////////////////////////
        // iterate over all nodes matching the rule
        XPath nlXpath = xpf.newXPath();
        nlXpath.setNamespaceContext(ctx);
        NodeList nl = (NodeList) nlXpath.evaluate(mr.nodeBase, doc, XPathConstants.NODESET);

        for (int i = 0; i < nl.getLength(); i++) {
            Node n = nl.item(i);

            // node type
            Map<String, URI> types = new HashMap<String, URI>();
            for (String owlType : mr.owlTypes) {
                URI type = null;
                if (owlType.contains("=")) {
                    String keyVal[] = owlType.split("=");
                    type = EndpointImpl.api().getNamespaceService().guessURIOrCreateInDefaultNS(keyVal[1]);
                    types.put(keyVal[0], type);
                } else {
                    type = EndpointImpl.api().getNamespaceService().guessURIOrCreateInDefaultNS(owlType);
                    types.put("*", type);
                }
            }

            // get node id
            URI uri = null;
            if (mr.objectId != null) {
                uri = resolveParametrizedExpressionAsURI(mr.objectId, n, mr.instanceNamespace, false, null, null);
                if (uri == null)
                    uri = getRandomId(n, mr.instanceNamespace);
            } else {
                uri = getRandomId(n, mr.instanceNamespace);
            }

            if (uri == null) {
                logger.warn(" URI could not be created: " + mr.objectId);
                continue; // cannot be resolved
            }

            URI type = null;
            if (types.containsKey(n.getNodeName()))
                type = types.get(n.getNodeName());
            else if (types.containsKey("*"))
                type = types.get("*");
            else
                throw new RuntimeException("Unmatched Tag Name '" + n.getNodeName() + "'.");
            stmts.add(vf.createStatement(uri, RDF.TYPE, type));

            // get node label
            List<Literal> labelValues = resolveParametrizedExpressionAsLiterals(mr.objectLabel, n, false, null,
                    null);
            if (labelValues.size() == 1)
                stmts.add(vf.createStatement(uri, RDFS.LABEL, labelValues.get(0)));

            // handle datatype properties
            for (DatatypePropertyMapping dpMapping : mr.datatypePropMappings) {
                String xpathDP = dpMapping.value;

                // type predicate and assign label
                URI predicate = uriResolver.resolveProperty(dpMapping.owlProperty, type, OWL.DATATYPEPROPERTY);

                // extract and write values for rule
                List<Literal> values = resolveParametrizedExpressionAsLiterals(xpathDP, n, dpMapping.useNodeName,
                        dpMapping.ignoreIfMatches, dpMapping.manipulator);
                for (int j = 0; j < values.size(); j++) {
                    stmts.add(vf.createStatement(uri, predicate, values.get(j)));
                }
            }

            // handle object properties
            for (ObjectPropertyMapping opMapping : mr.objectPropertyMappings) {
                String xpathOP = opMapping.value;

                // type predicate and assign label
                URI predicate = uriResolver.resolveProperty(opMapping.owlProperty, type, OWL.OBJECTPROPERTY);

                // create object property mapping for each node in the node base
                XPath opmNodeListXpath = xpf.newXPath();
                opmNodeListXpath.setNamespaceContext(ctx);
                NodeList opmNodeList = (NodeList) opmNodeListXpath.evaluate(opMapping.nodeBase, n,
                        XPathConstants.NODESET);
                for (int j = 0; j < opmNodeList.getLength(); j++) {
                    Node opmNode = opmNodeList.item(j);

                    // extract and write values for rule
                    List<String> values = null;
                    if (opMapping.hashValue)
                        values = getHashValue(xpathOP, opmNode);
                    else
                        values = resolveParametrizedExpression(xpathOP, opmNode, opMapping.useNodeName,
                                opMapping.ignoreIfMatches);

                    // we use the namespace from the referred rule, to create exactly
                    // the same URI as written by the original object
                    MappingRule referredRule = mappingRules.get(opMapping.referredRule);

                    String namespace = referredRule != null ? referredRule.instanceNamespace
                            : opMapping.instanceNamespace;
                    for (String value : values) {
                        // generate the corresponding statement
                        URI obj = createUriInNamespace(value, namespace);
                        stmts.add(vf.createStatement(uri, predicate, obj));
                    }
                }
            }
        }
    }

    /**
     * Requires the input to be a single node
     * 
     * @param xpathOP
     * @param n
     * @return
     */
    protected List<String> getHashValue(String xpathOP, Node context) {
        List<String> res = new ArrayList<String>();
        Pattern p = Pattern.compile("^\\{([^\\}]*)\\}$");
        Matcher m = p.matcher(xpathOP);

        if (m.matches()) {
            try {
                XPath xpath = xpf.newXPath();
                xpath.setNamespaceContext(ctx);
                XPathExpression xpathExp = xpath.compile(xpathOP.substring(1, xpathOP.length() - 1));
                NodeList nl = (NodeList) xpathExp.evaluate(context, XPathConstants.NODESET);

                for (int i = 0; i < nl.getLength(); i++) {
                    Node n = nl.item(i);
                    String s = XML.toFormattedString(n);
                    res.add(SHA512.encrypt(s));
                }
            } catch (Exception e) {
                logger.warn(e.getMessage());
            }

        }
        return res;
    }

    /**
     * Resolves a parametrized expression against a given context node.
     * A parametrized expression is a string of the form
     * 
     *   "Bla bla {XP1} some text {XP2} ... {XPn}",
     * 
     * where XP1 ... XPn are XPath expressions. When evaluating a parametrized
     * expression, the XPath expressions are evaluated against the context 
     * node and their occurences are replaced by the result nodes. The result
     * nodes is a list of strings, representing all permutations of solutions.
     * 
     * As an example, assume the parametrized expression is
     * 
     *   "{./name} - {./friend}"
     *  
     * and [[./name]] = { Pete }, [[./friend]] = { Jane, Joe }, then the result
     * of evaluating the parametrized expression is the list { "Pete - Jane", "Pete - Joe" }.
     * 
     * @param parametrizedExpression
     * @param context
     * @return
     */
    protected List<String> resolveParametrizedExpression(String parametrizedExpression, Node context,
            boolean useNodeName, String ignoreIfMatches) {
        Map<String, XPathExpression> map = new HashMap<String, XPathExpression>();

        List<String> result = new ArrayList<String>();
        if (parametrizedExpression == null)
            return result;

        // first collect XPath Expression hidden in ruleExpression
        Map<String, List<String>> xPathExpressions = new HashMap<String, List<String>>();
        Matcher m = PARAMETRIZED_EXPRESSION_PATTERN.matcher(parametrizedExpression);
        while (m.find())
            xPathExpressions.put(m.group(0), new ArrayList<String>());

        XPath xpathDPExp = xpf.newXPath();
        xpathDPExp.setNamespaceContext(ctx);
        for (Entry<String, List<String>> entry : xPathExpressions.entrySet()) {
            String xPathExpression = entry.getKey();
            try {
                XPathExpression xpathExp = map.get(xPathExpression);
                if (xpathExp == null) {
                    xpathExp = xpathDPExp.compile(xPathExpression.substring(1, xPathExpression.length() - 1));
                    map.put(xPathExpression, xpathExp);
                }

                try {
                    NodeList dpNodeList = (NodeList) xpathExp.evaluate(context, XPathConstants.NODESET);

                    for (int i = 0; i < dpNodeList.getLength(); i++) {
                        Node dpNode = dpNodeList.item(i);
                        String dpNodeVal = null;
                        if (useNodeName)
                            dpNodeVal = dpNode.getNodeName();
                        else {
                            if (dpNode instanceof Element)
                                dpNodeVal = dpNode.getTextContent();
                            else
                                dpNodeVal = dpNode.getNodeValue();
                        }
                        if (!StringUtil.isNullOrEmpty(dpNodeVal))
                            entry.getValue().add(dpNodeVal);
                    }
                } catch (XPathExpressionException isString) {
                    String string = (String) xpathExp.evaluate(context, XPathConstants.STRING);
                    if (!StringUtil.isNullOrEmpty(string))
                        entry.getValue().add(string);
                }
            } catch (Exception e) {
                logger.warn(e.getMessage());
                return result; // error
            }
        }

        // and compute set of all Literals
        result.add(parametrizedExpression);
        for (Entry<String, List<String>> entry : xPathExpressions.entrySet()) {
            String outerKey = entry.getKey();
            List<String> tempResult = new ArrayList<String>();
            List<String> outer = entry.getValue();
            for (int i = 0; i < outer.size(); i++) {
                for (String res : result) {
                    while (res.contains(outerKey))
                        res = res.replace(outerKey, outer.get(i));
                    tempResult.add(res);
                }
            }
            result = tempResult;
        }

        if (StringUtil.isNullOrEmpty(ignoreIfMatches))
            return result;

        // else: we filter the result
        List<String> resultFiltered = new ArrayList<String>();
        for (String s : result) {
            if (!s.matches(ignoreIfMatches))
                resultFiltered.add(s);
        }
        return resultFiltered;
    }

    /**
     * Resolves a parametrized expression against a given context node.
     * Wraps the list of strings as literal list.
     * 
     * @param parametrizedExpression
     * @param context
     * @return
     */
    protected List<Literal> resolveParametrizedExpressionAsLiterals(String parametrizedExpression, Node context,
            boolean useNodeName, String ignoreIfMatches, String manipulator) {
        List<String> strList = resolveParametrizedExpression(parametrizedExpression, context, useNodeName,
                ignoreIfMatches);

        // generate literals
        List<Literal> ret = new ArrayList<Literal>();
        for (String val : strList) {
            if (!StringUtil.isNullOrEmpty(val))
                if (StringUtil.isNullOrEmpty(manipulator))
                    ret.add(vf.createLiteral(val));
                else {
                    ValueManipulator vm = ValueManipulator.initFromString(manipulator);
                    ret.add(vf.createLiteral(vm.manipulate(val)));
                }
        }
        return ret;
    }

    /**
     * Resolves a parametrized expression against a given context node.
     * Wraps the single result as URI. If the result is not unique, null is returned.
    */
    protected URI resolveParametrizedExpressionAsURI(String parametrizedExpression, Node context, String namespace,
            boolean useNodeName, String ignoreIfMatches, String manipulator) {
        List<String> strList = resolveParametrizedExpression(parametrizedExpression, context, useNodeName,
                ignoreIfMatches);
        if (strList.size() != 1 || StringUtil.isNullOrEmpty(strList.get(0)))
            return null;

        ValueManipulator vm = ValueManipulator.initFromString(manipulator);
        return createUriInNamespace(vm.manipulate(strList.get(0)), namespace);
    }

    /**
     * @param value the URI suffix
     * @param namespace a resolvable namespace; if null or not resolvable, the URI will
     *          be generated in the default namespace
     * @return
     */
    protected URI createUriInNamespace(String value, String namespace) {
        String base = StringUtil.replaceNonIriRefCharacter(value, '_');

        String resolvedNamespace = null;
        if (namespace != null) {
            resolvedNamespace = EndpointImpl.api().getNamespaceService().getRegisteredNamespacePrefixes()
                    .get(namespace);
            if (resolvedNamespace == null)
                logger.warn("Namespace " + namespace + " cannot be resolved.");
        }
        if (resolvedNamespace == null)
            resolvedNamespace = EndpointImpl.api().getNamespaceService().defaultNamespace();

        return vf.createURI(resolvedNamespace, base);
    }

    /**
     * Parses the mapping file into internal datastructure
     * 
     * @param mappingFile
     * @return
     * @throws Exception
     */
    protected HashMap<String, MappingRule> parseMappingFile(File mappingFile) throws Exception {
        HashMap<String, MappingRule> mappingRules = new HashMap<String, MappingRule>();

        DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
        factory.setNamespaceAware(false); // should be parametrizable
        DocumentBuilder builder = factory.newDocumentBuilder();
        Document doc = builder.parse(mappingFile);

        XPath topLevelExp = xpf.newXPath();
        NodeList nl = (NodeList) topLevelExp.evaluate("/mappingSpec/rule", doc, XPathConstants.NODESET);
        for (int i = 0; i < nl.getLength(); i++) {
            Node n = nl.item(i);
            NamedNodeMap atts = n.getAttributes();

            MappingRule rule = new MappingRule();
            rule.id = getNodeTextContent(atts, "id");
            rule.nodeBase = getNodeTextContent(atts, "nodeBase");
            rule.instanceNamespace = getNodeTextContent(atts, "instanceNamespace");

            String owlTypes = getNodeTextContent(atts, "owlType");
            String[] splTypes = owlTypes.split(",");
            rule.owlTypes = new ArrayList<String>();
            for (int j = 0; j < splTypes.length; j++) {
                String owlType = splTypes[j];
                if (!StringUtil.isNullOrEmpty(owlType) && !StringUtil.isNullOrEmpty(owlType.trim()))
                    rule.owlTypes.add(owlType.trim());
            }
            if (rule.owlTypes.isEmpty())
                throw new IllegalArgumentException("owlType invalid or not specified: " + owlTypes);

            rule.objectId = getNodeTextContent(atts, "objectId");
            rule.objectLabel = getNodeTextContent(atts, "objectLabel");
            rule.datatypePropMappings = new ArrayList<DatatypePropertyMapping>();
            XPath dpm = xpf.newXPath();
            NodeList dpml = (NodeList) dpm.evaluate("./datatypePropertyMapping", n, XPathConstants.NODESET);
            for (int j = 0; j < dpml.getLength(); j++) {
                Node dpmNode = dpml.item(j);
                NamedNodeMap dpmNodeAtts = dpmNode.getAttributes();
                String value = getNodeTextContent(dpmNodeAtts, "value");
                String owlProperty = getNodeTextContent(dpmNodeAtts, "owlProperty");
                String useNodeName = getNodeTextContent(dpmNodeAtts, "useNodeName");
                String ignoreIfMatches = getNodeTextContent(dpmNodeAtts, "ignoreIfMatches");
                String manipulator = getNodeTextContent(dpmNodeAtts, "manipulator");

                if (!StringUtil.isNullOrEmpty(value) && !StringUtil.isNullOrEmpty(owlProperty)) {
                    DatatypePropertyMapping mapping = new DatatypePropertyMapping();
                    mapping.value = value;
                    mapping.owlProperty = owlProperty;
                    mapping.useNodeName = useNodeName != null && "true".equals(useNodeName);
                    mapping.ignoreIfMatches = ignoreIfMatches;
                    mapping.manipulator = manipulator;
                    rule.datatypePropMappings.add(mapping);
                }
            }

            rule.objectPropertyMappings = new ArrayList<ObjectPropertyMapping>();
            XPath opm = xpf.newXPath();
            NodeList opml = (NodeList) opm.evaluate("./objectPropertyMapping", n, XPathConstants.NODESET);
            for (int j = 0; j < opml.getLength(); j++) {
                Node opmNode = opml.item(j);
                NamedNodeMap opmNodeAtts = opmNode.getAttributes();
                String nodeBase = getNodeTextContent(opmNodeAtts, "nodeBase");
                String value = getNodeTextContent(opmNodeAtts, "value");
                String owlProperty = getNodeTextContent(opmNodeAtts, "owlProperty");
                String referredRule = getNodeTextContent(opmNodeAtts, "referredRule");
                String instanceNamespace = getNodeTextContent(opmNodeAtts, "instanceNamespace");
                String hashValue = getNodeTextContent(opmNodeAtts, "hashValue");
                String useNodeName = getNodeTextContent(opmNodeAtts, "useNodeName");
                String ignoreIfMatches = getNodeTextContent(opmNodeAtts, "ignoreIfMatches");

                if (!StringUtil.isNullOrEmpty(nodeBase) && !StringUtil.isNullOrEmpty(value)
                        && !StringUtil.isNullOrEmpty(owlProperty)) {
                    ObjectPropertyMapping mapping = new ObjectPropertyMapping();
                    mapping.nodeBase = nodeBase;
                    mapping.value = value;
                    mapping.owlProperty = owlProperty;
                    mapping.referredRule = referredRule;
                    mapping.instanceNamespace = instanceNamespace;
                    mapping.hashValue = "true".equals(hashValue);
                    mapping.useNodeName = useNodeName != null && "true".equals(useNodeName);
                    mapping.ignoreIfMatches = ignoreIfMatches;

                    rule.objectPropertyMappings.add(mapping);
                } else {
                    logger.warn("Illegal rule specification: " + XML.toFormattedString(opmNode));
                }
            }

            mappingRules.put(rule.id, rule);
        }

        return mappingRules;
    }

    protected String getNodeTextContent(NamedNodeMap nnm, String prop) {
        Node n = nnm.getNamedItem(prop);
        return n == null ? null : n.getTextContent();
    }

    protected URI getRandomId(Node n, String namespace) {
        String s = XML.toFormattedString(n);
        try {
            return s == null ? null : createUriInNamespace(SHA512.encrypt(s), namespace);
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }

    /**
     * @return a namespace context build according to the config
     */
    protected NamespaceContext getNamespaceContextFromConfig() {
        final HashMap<String, String> namespaceMapping = new HashMap<String, String>();
        if (!StringUtil.isNullOrEmpty(config.namespaceAbbreviations)) {
            String[] mappings = config.namespaceAbbreviations.split(",");
            for (String mapping : mappings) {
                mapping = mapping.trim();
                String[] inner = mapping.split("=", -1);
                if (inner.length == 2)
                    namespaceMapping.put(inner[0], inner[1]);
            }
        }

        return new NamespaceContext() {
            public String getNamespaceURI(String prefix) {
                return namespaceMapping.get(prefix);
            }

            public Iterator<String> getPrefixes(String uri) {
                return null;
            }

            public String getPrefix(String uri) {
                return null;
            }
        };
    }

    /**
     * Initializes the XPathFactory member variable of the class.
     */
    protected void initializeXPathFactory() {
        if (!StringUtil.isNullOrEmpty(config.xpathFactoryClass)) {
            try {
                Class<?> c = Class.forName(config.xpathFactoryClass);
                xpf = (XPathFactory) c.newInstance();
            } catch (Exception e) {
                logger.warn("Could not load XPathFactory implementation '" + config.xpathFactoryClass + "'. "
                        + "Using default instead.");
            }
        }

        // in case xpf is not set or something went wrong: create standard XPathFactory
        if (xpf == null)
            xpf = XPathFactoryImpl.newInstance();
    }

    public static class Config extends AbstractFlexProvider.DataSourceProviderConfig implements Serializable {
        /**
         * 
         */
        private static final long serialVersionUID = 4877664239008519735L;

        @ParameterConfigDoc(desc = "XML Input File", required = true)
        public String xmlfile;

        @ParameterConfigDoc(desc = "The location of a mapping file describing how to map the XML file to the RDF data model. The file must be located in a directory where access is granted. Example: config/xml/MappingSpec.xml", required = true, type = Type.FILEEDITOR)
        public String mappingfile;

        @ParameterConfigDoc(desc = "Resolver file defining a translation from properties used in the input and mapping files to actual RDF properties")
        public String globalResolver;

        @ParameterConfigDoc(desc = "XPath Factory class (possibly from an external library) that is used for resolving XPath expressions when parsing the input and mapping files")
        public String xpathFactoryClass;

        @ParameterConfigDoc(desc = "Comma-separated list of namespaces that can occur in XPath expressions in the mapping file e.g. fluidops=http://www.fluidops.com/, we can now use fluidops: to refer to XML elements in the respective namespace.")
        public String namespaceAbbreviations;

        @ParameterConfigDoc(desc = "Encoding of the input file e.g. UTF-8, iso-8859-1")
        public String inputEncoding;

        @ParameterConfigDoc(desc = "Login credentials for retrieving a file which is protected using BasicAuthentification (if needed)", required = false)
        public User user;
    }

    /**
     * A single mapping rule.
     * 
     * @author michaelschmidt
     */
    public static class MappingRule {
        /** 
         * Unique ID for the mapping rule. Using the same ID for multiple
         * more than once means only the last rule is valid.
         */
        public String id;

        /**
         * XPath expression specifying the nodes set the rule applies to.
         */
        public String nodeBase;

        /**
         * OWL type that will be assigned to the nodes in the node base.
         */
        public List<String> owlTypes;

        /**
         * Namespace to which instances are written. Must be a valid namespace
         * prefix as registered in the namespace service.
         */
        public String instanceNamespace;

        /**
         * Parametrized XPath expression for generating the object ID
         * relative to context node; example: {./@lastame}-{./@firstname}
         */
        public String objectId;

        /**
         * Parametrized XPath expression for generating the object ID;
         * example: {./@lastame}-{./@firstname}
         */
        public String objectLabel;

        /**
         * List of Datatype Property Mappings
         */
        public List<DatatypePropertyMapping> datatypePropMappings;

        /**
         * List of Object Property Mappings
         */
        public List<ObjectPropertyMapping> objectPropertyMappings;
    }

    public static class DatatypePropertyMapping {
        /**
         * Parametrized XPath expression denoting the value.
         */
        public String value;

        /**
         * The OWL property used for referencing to value.
         */
        public String owlProperty;

        /**
         * manipulation methods for the extracted value (toLowercase, toUppercase, ...)
         */
        public String manipulator;

        /**
         * Use node name instead of node text for value
         */
        public boolean useNodeName;

        /**
         * Regexp that, if matches the value, has the consequence that
         * the property-value-pair is ignored.
         */
        public String ignoreIfMatches;
    }

    public static class ObjectPropertyMapping {
        /**
         * Node base: for these nodes, the object property is generated
         */
        public String nodeBase;

        /**
         * Parametrized XPath expression denoting the value relative to the base node.
         */
        public String value;

        /**
         * Hash the value
         */
        public boolean hashValue;

        /**
         * The referred rule (needed because namespace of referred resource is unknown).
         * If, for instance, the object property mapping refers to a Person and there
         * is a rule for generating the person, provide the id of the latter rule here.
         */
        public String referredRule;

        /**
         * The OWL property used for referencing to value.
         */
        public String owlProperty;

        /**
         * As an alternative to referred rule (i.e., if the object is not created
         * by an other rule), we can use instanceNamespace to pass a namespace.
         */
        public String instanceNamespace;

        /**
         * Use node name instead of node text for value
         */
        public boolean useNodeName;

        /**
         * Regexp that, if matches the value, has the consequence that
         * the property-value-pair is ignored.
         */
        public String ignoreIfMatches;

    }

    /**
     * Value manipulation including implementation, such as toLowerCase,
     * toUpperCase, ...
     */
    protected static enum ValueManipulator {
        NO_TRANSFORM {
            @Override
            public String manipulate(String value) {
                return value;
            }
        },

        toLowerCase {
            @Override
            public String manipulate(String value) {
                return value.toLowerCase();
            }
        },

        toUpperCase {
            @Override
            public String manipulate(String value) {
                return value.toUpperCase();
            }
        };

        /**
         * Init a value transformator from a user string. In case the user
         * string is invalid, a value transformator implementing the id mapping
         * is returned.
         * 
         * @param userStr
         * @return
         */
        public static ValueManipulator initFromString(String userStr) {
            try {
                return ValueManipulator.valueOf(userStr);
            }
            // NullPointer, IllegalArgument, whatever...
            catch (Exception e) {
                return ValueManipulator.NO_TRANSFORM;
            }
        }

        public abstract String manipulate(String value);
    }
}