org.dkpro.core.io.rdf.internal.Rdf2Uima.java Source code

Java tutorial

Introduction

Here is the source code for org.dkpro.core.io.rdf.internal.Rdf2Uima.java

Source

/*
 * Licensed to the Technische Universitt Darmstadt under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The Technische Universitt Darmstadt 
 * licenses this file to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.
 *  
 * http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.dkpro.core.io.rdf.internal;

import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.commons.collections4.iterators.IteratorIterable;
import org.apache.commons.lang3.StringUtils;
import org.apache.jena.ontology.OntResource;
import org.apache.jena.rdf.model.Model;
import org.apache.jena.rdf.model.Property;
import org.apache.jena.rdf.model.Resource;
import org.apache.jena.rdf.model.Statement;
import org.apache.jena.vocabulary.RDF;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.CASException;
import org.apache.uima.cas.Feature;
import org.apache.uima.cas.FeatureStructure;
import org.apache.uima.cas.Type;
import org.apache.uima.fit.util.CasUtil;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;

import de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData;

public class Rdf2Uima {
    public static void convert(Statement aContext, JCas aJCas) throws CASException {
        Model m = aContext.getModel();

        // Set up names
        Resource tView = m.createResource(RdfCas.TYPE_VIEW);
        Resource tFeatureStructure = m.createResource(RdfCas.TYPE_FEATURE_STRUCTURE);
        Property pIndexedIn = m.createProperty(RdfCas.PROP_INDEXED_IN);

        Map<Resource, FeatureStructure> fsIndex = new HashMap<>();

        // Convert the views/SofAs
        Map<Resource, JCas> viewIndex = new HashMap<>();
        Iterator<Resource> viewIter = m.listSubjectsWithProperty(RDF.type, tView);
        for (Resource view : new IteratorIterable<Resource>(viewIter)) {
            JCas viewJCas = convertView(view, aJCas);
            viewIndex.put(view, viewJCas);
            fsIndex.put(view, viewJCas.getSofa());
        }

        // Convert the FSes but without setting their feature values yet - we cannot fill
        // the feature values just set because some of them may point to FSes not yet created
        List<Resource> fses = m.listSubjectsWithProperty(RDF.type, tFeatureStructure).toList();
        for (Resource fs : fses) {
            FeatureStructure uimaFS = initFS(fs.as(OntResource.class), aJCas);
            fsIndex.put(fs, uimaFS);
        }

        // Now fill the FSes with their feature values
        for (Resource fs : fses) {
            convertFS(fs.as(OntResource.class), aJCas, fsIndex);
        }

        // Finally add the FSes to the indexes of the respective views
        for (Resource fs : fses) {
            Iterator<Statement> indexedInIter = fs.listProperties(pIndexedIn);
            for (Statement indexedIn : new IteratorIterable<Statement>(indexedInIter)) {
                JCas viewJCas = viewIndex.get(indexedIn.getResource());
                viewJCas.addFsToIndexes(fsIndex.get(fs));
            }
        }
    }

    public static JCas convertView(Resource aView, JCas aJCas) throws CASException {
        Model m = aView.getModel();

        // Set up names
        Property pSofaID = m.createProperty(RdfCas.PROP_SOFA_ID);
        Property pSofaString = m.createProperty(RdfCas.PROP_SOFA_STRING);
        Property pSofaMimeType = m.createProperty(RdfCas.PROP_SOFA_MIME_TYPE);

        // Get the values
        String viewName = aView.getProperty(pSofaID).getString();
        String sofaString = aView.getProperty(pSofaString).getString();
        String sofaMimeType = aView.getProperty(pSofaMimeType).getString();

        // Instantiate the view/SofA
        JCas view = JCasUtil.getView(aJCas, viewName, true);
        view.setSofaDataString(sofaString, sofaMimeType);

        return view;
    }

    public static FeatureStructure initFS(OntResource aFS, JCas aJCas) {
        CAS cas = aJCas.getCas();

        // Figure out the UIMA type - there can be only one type per FS
        Set<Resource> types = aFS.listRDFTypes(true).toSet();
        types.removeIf(res -> res.getURI().startsWith(RdfCas.NS_RDFCAS));
        assert types.size() == 1;
        Type type = CasUtil.getType(cas, types.iterator().next().getURI().substring(RdfCas.NS_UIMA.length()));

        FeatureStructure fs;
        if (type.getName().equals(DocumentMetaData.class.getName())) {
            // Special handling to avoid ending up with two document annotations in the CAS
            fs = DocumentMetaData.get(aJCas);
        } else {
            fs = cas.createFS(type);
        }

        return fs;
    }

    public static FeatureStructure convertFS(OntResource aFS, JCas aJCas,
            Map<Resource, FeatureStructure> aFsIndex) {
        FeatureStructure fs = aFsIndex.get(aFS);

        Iterator<Statement> stmtIter = aFS.listProperties();
        for (Statement stmt : new IteratorIterable<Statement>(stmtIter)) {
            // Skip all non-features
            if (!stmt.getPredicate().getURI().startsWith("uima:")) {
                // System.out.println("Skipping: " + stmt);
                continue;
            }

            String featureName = StringUtils.substringAfterLast(stmt.getPredicate().getURI(), "-");
            Feature uimaFeat = fs.getType().getFeatureByBaseName(featureName);

            // Cannot update start/end of document annotation because that FS is already indexed, so
            // we skip those
            if (fs == aJCas.getDocumentAnnotationFs() && (CAS.FEATURE_BASE_NAME_BEGIN.equals(featureName)
                    || CAS.FEATURE_BASE_NAME_END.equals(featureName))) {
                System.out.println("Skipping: " + stmt);
                continue;
            }

            if (uimaFeat.getRange().isPrimitive()) {
                switch (uimaFeat.getRange().getName()) {
                case CAS.TYPE_NAME_BOOLEAN:
                    fs.setBooleanValue(uimaFeat, stmt.getObject().asLiteral().getBoolean());
                    break;
                case CAS.TYPE_NAME_BYTE:
                    fs.setByteValue(uimaFeat, stmt.getObject().asLiteral().getByte());
                    break;
                case CAS.TYPE_NAME_DOUBLE:
                    fs.setDoubleValue(uimaFeat, stmt.getObject().asLiteral().getDouble());
                    break;
                case CAS.TYPE_NAME_FLOAT:
                    fs.setFloatValue(uimaFeat, stmt.getObject().asLiteral().getFloat());
                    break;
                case CAS.TYPE_NAME_INTEGER:
                    fs.setIntValue(uimaFeat, stmt.getObject().asLiteral().getInt());
                    break;
                case CAS.TYPE_NAME_LONG:
                    fs.setLongValue(uimaFeat, stmt.getObject().asLiteral().getLong());
                    break;
                case CAS.TYPE_NAME_SHORT:
                    fs.setShortValue(uimaFeat, stmt.getObject().asLiteral().getShort());
                    break;
                case CAS.TYPE_NAME_STRING: {
                    fs.setStringValue(uimaFeat, stmt.getObject().asLiteral().getString());
                    break;
                }
                default:
                    throw new IllegalArgumentException("Feature [" + uimaFeat.getName()
                            + "] has unsupported primitive type [" + uimaFeat.getRange().getName() + "]");
                }
            } else {
                FeatureStructure targetUimaFS = aFsIndex.get(stmt.getObject().asResource());
                if (targetUimaFS == null) {
                    throw new IllegalStateException(
                            "No UIMA FS found for [" + stmt.getObject().asResource().getURI() + "]");
                }
                fs.setFeatureValue(uimaFeat, targetUimaFS);
            }
        }

        return fs;
    }
}