org.aksw.simba.cetus.tools.ClassModelCreator.java Source code

Java tutorial

Introduction

Here is the source code for org.aksw.simba.cetus.tools.ClassModelCreator.java

Source

/**
 * This file is part of Cetus.
 *
 * Cetus is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * Cetus is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with Cetus.  If not, see <http://www.gnu.org/licenses/>.
 */
package org.aksw.simba.cetus.tools;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Queue;
import java.util.Set;

import org.aksw.gerbil.io.nif.AbstractNIFParser;
import org.aksw.simba.cetus.dolce.DolceClassHierarchyLoader;
import org.aksw.simba.cetus.yago.YagoBasedTypeSearcher;
import org.aksw.simba.cetus.yago.YagoClassHierarchyLoader;
import org.apache.commons.io.IOUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.hp.hpl.jena.rdf.model.InfModel;
import com.hp.hpl.jena.rdf.model.Model;
import com.hp.hpl.jena.rdf.model.ModelFactory;
import com.hp.hpl.jena.rdf.model.NodeIterator;
import com.hp.hpl.jena.rdf.model.RDFNode;
import com.hp.hpl.jena.rdf.model.ResIterator;
import com.hp.hpl.jena.rdf.model.Resource;
import com.hp.hpl.jena.rdf.model.Statement;
import com.hp.hpl.jena.rdf.model.StmtIterator;
import com.hp.hpl.jena.reasoner.Reasoner;
import com.hp.hpl.jena.reasoner.rulesys.GenericRuleReasoner;
import com.hp.hpl.jena.reasoner.rulesys.Rule;
import com.hp.hpl.jena.vocabulary.OWL;
import com.hp.hpl.jena.vocabulary.RDF;
import com.hp.hpl.jena.vocabulary.RDFS;

public class ClassModelCreator {

    private static final Logger LOGGER = LoggerFactory.getLogger(ClassModelCreator.class);

    private static final String TYPE_INFERENCE_RULES = "cetusTypeInferencerRules.txt";

    public static void main(String[] args) {
        Model classModel = ModelFactory.createDefaultModel();
        classModel.setNsPrefixes(YagoBasedTypeSearcher.getPrefixMapping());

        LOGGER.info("Loading YAGO classes...");
        // load YAGO class hierarchy
        if (!loadYagoClasses(classModel)) {
            return;
        }

        LOGGER.info("Loading DOLCE classes...");
        // load DOLCE ontology
        if (!loadDolceClasses(classModel)) {
            return;
        }

        LOGGER.info("Loading YAGO - DOLCE links...");
        // load links between YAGO and DOLCE
        if (!loadDolceLinks(classModel)) {
            return;
        }

        LOGGER.info("\"Manual\" SubClassInference ...");
        // load links between YAGO and DOLCE
        if (!inferClassRelations(classModel)) {
            return;
        }

        // LOGGER.info("Inferring sub classes...");
        // // infer needed class relations
        // if (!inferClassRelations_OLD(classModel)) {
        // return;
        // }
        LOGGER.info("Transforming model ...");
        classModel = escapeClassModel(classModel);

        FileOutputStream fout = null;
        try {
            fout = new FileOutputStream("cetus_data/classes.ttl");
            classModel.write(fout, "TTL");
        } catch (Exception e) {
            LOGGER.error("Error while trying to write the class model to file.", e);
        } finally {
            IOUtils.closeQuietly(fout);
        }
    }

    protected static boolean loadYagoClasses(Model classModel) {
        YagoClassHierarchyLoader loader = new YagoClassHierarchyLoader();
        try {
            // FIXME put the path into properties file
            loader.readClassHierarchy(new File("cetus_data/yagoTaxonomy.tsv"), classModel);
            return true;
        } catch (IOException e) {
            LOGGER.error("Couldn't load YAGO classes. Returning null.");
        }
        return false;
    }

    protected static boolean loadDolceClasses(Model classModel) {
        DolceClassHierarchyLoader loader = new DolceClassHierarchyLoader();
        try {
            // FIXME put the path into properties file
            loader.readClassHierarchy(new File("cetus_data/DOLCE"), classModel);
            return true;
        } catch (IOException e) {
            LOGGER.error("Couldn't load DOLCE classes. Returning null.");
        }
        return false;
    }

    protected static boolean loadDolceLinks(Model classModel) {
        FileInputStream is = null;
        try {
            // FIXME put the path into properties file
            is = new FileInputStream("cetus_data/DOLCE_YAGO_links.nt");
            classModel.read(is, null, "N3");
            return true;
        } catch (IOException e) {
            LOGGER.error("Couldn't load DOLCE YAGO links. Returning null.", e);
        } finally {
            IOUtils.closeQuietly(is);
        }
        return false;
    }

    @Deprecated
    public static boolean inferClassRelations_OLD(Model classModel) {
        InputStream is = AbstractNIFParser.class.getClassLoader().getResourceAsStream(TYPE_INFERENCE_RULES);
        List<String> lines;
        try {
            lines = IOUtils.readLines(is);
        } catch (IOException e) {
            LOGGER.error("Couldn't load type inferencer rules from resource \"" + TYPE_INFERENCE_RULES
                    + "\". Working on the standard model.", e);
            return false;
        }
        IOUtils.closeQuietly(is);
        StringBuilder sb = new StringBuilder();
        for (String line : lines) {
            sb.append(line);
        }

        Reasoner reasoner = new GenericRuleReasoner(Rule.parseRules(sb.toString()));
        InfModel infModel = ModelFactory.createInfModel(reasoner, classModel);
        classModel.add(infModel);
        return true;
    }

    public static boolean inferClassRelations(Model classModel) {
        Resource a, b;
        StmtIterator stmtIterator;
        Statement statement;

        // add class type
        stmtIterator = classModel.listStatements(null, RDFS.subClassOf, (RDFNode) null);
        while (stmtIterator.hasNext()) {
            statement = stmtIterator.next();
            a = statement.getSubject();
            b = statement.getObject().asResource();
            if (!classModel.contains(a, RDF.type, RDFS.Class)) {
                classModel.add(a, RDF.type, RDFS.Class);
            }
            if (!classModel.contains(b, RDF.type, RDFS.Class)) {
                classModel.add(b, RDF.type, RDFS.Class);
            }
        }

        // Add subclasses to equivalent classes
        stmtIterator = classModel.listStatements(null, OWL.equivalentClass, (RDFNode) null);
        List<Resource[]> pairs = new ArrayList<Resource[]>();
        while (stmtIterator.hasNext()) {
            statement = stmtIterator.next();
            a = statement.getSubject();
            b = statement.getObject().asResource();
            pairs.add(new Resource[] { a, b });
            addSubClassesOf(a, b, classModel);
            addSubClassesOf(b, a, classModel);
        }
        for (Resource[] pair : pairs) {
            if (!classModel.contains(pair[1], OWL.equivalentClass, pair[0])) {
                classModel.add(pair[1], OWL.equivalentClass, pair[0]);
            }
        }
        return true;
    }

    private static void addSubClassesOf(Resource clazz, Resource equivalentClass, Model classModel) {
        ResIterator iterator = classModel.listSubjectsWithProperty(RDFS.subClassOf, clazz);
        Set<Resource> subClasses = new HashSet<Resource>();
        while (iterator.hasNext()) {
            subClasses.add(iterator.next());
        }
        for (Resource subClass : subClasses) {
            if (!classModel.contains(subClass, RDFS.subClassOf, equivalentClass)) {
                classModel.add(subClass, RDFS.subClassOf, equivalentClass);
            }
        }
    }

    public static boolean inferSubClassRelations(Model classModel) {
        Set<Resource> subClasses = new HashSet<Resource>();
        ResIterator resIterator = classModel.listSubjectsWithProperty(RDFS.subClassOf);
        while (resIterator.hasNext()) {
            subClasses.add(resIterator.next());
        }
        LOGGER.info("Found " + subClasses.size() + " sub classes.");

        Queue<Resource> queue = new LinkedList<Resource>();
        int count = 0, count2 = 0;
        Resource resource, next;
        NodeIterator nodeIterator;
        Set<Resource> equalClasses = new HashSet<Resource>();
        Set<Resource> alreadySeen = new HashSet<Resource>();
        for (Resource subClass : subClasses) {
            equalClasses.clear();
            equalClasses.add(subClass);
            nodeIterator = classModel.listObjectsOfProperty(subClass, OWL.equivalentClass);
            while (nodeIterator.hasNext()) {
                equalClasses.add(nodeIterator.next().asResource());
            }
            resIterator = classModel.listSubjectsWithProperty(OWL.equivalentClass, subClass);
            while (resIterator.hasNext()) {
                equalClasses.add(resIterator.next());
            }
            for (Resource equalClass : equalClasses) {
                nodeIterator = classModel.listObjectsOfProperty(equalClass, RDFS.subClassOf);
                while (nodeIterator.hasNext()) {
                    queue.add(nodeIterator.next().asResource());
                }
            }
            alreadySeen.clear();
            while (!queue.isEmpty()) {
                resource = queue.poll();
                // mark this resource as super class of the sub class
                classModel.add(subClass, RDFS.subClassOf, resource);
                ++count2;
                if (!classModel.contains(resource, RDF.type, RDFS.Class)) {
                    classModel.add(resource, RDF.type, RDFS.Class);
                }

                nodeIterator = classModel.listObjectsOfProperty(resource, RDFS.subClassOf);
                while (nodeIterator.hasNext()) {
                    next = nodeIterator.next().asResource();
                    if (!alreadySeen.contains(next)) {
                        queue.add(next);
                        alreadySeen.add(next);
                    }
                }
            }
            ++count;
            if ((count % 100000) == 0) {
                LOGGER.info("processed " + count + " sub classes.");
            }
        }
        LOGGER.info("Added " + count2 + " properties.");
        return true;
    }

    public static Model escapeClassModel(Model classModel) {
        Model newClassModel = ModelFactory.createDefaultModel();
        newClassModel.setNsPrefixes(YagoBasedTypeSearcher.getPrefixMapping());
        StmtIterator stmtIterator = classModel.listStatements();
        Statement statement;
        String subjectUri, objectUri;
        Resource subject;
        RDFNode object;
        while (stmtIterator.hasNext()) {
            statement = (Statement) stmtIterator.next();
            subject = statement.getSubject();
            if (subject.getURI().startsWith("http://yago-knowledge")) {
                subjectUri = encodeUri(subject.getURI());
                subject = newClassModel.createResource(subjectUri);
            }
            object = statement.getObject();
            if (object.isResource()) {
                if (object.asResource().getURI().startsWith("http://yago-knowledge")) {
                    objectUri = encodeUri(object.asResource().getURI());
                    object = newClassModel.createResource(objectUri);
                }
            }
            newClassModel.add(subject, statement.getPredicate(), object);
        }
        return newClassModel;
    }

    public static String encodeUri(String uri) {
        StringBuilder builder = new StringBuilder();
        int start = uri.indexOf("://");
        if (start < 0) {
            return uri;
        }
        start += 3;
        builder.append(uri.substring(0, start));
        int end = uri.indexOf("/", start);
        try {
            while (end >= 0) {
                builder.append(URLEncoder.encode(uri.substring(start, end), "UTF-8"));
                builder.append('/');
                start = end + 1;
                end = uri.indexOf("/", start);
            }
            builder.append(URLEncoder.encode(uri.substring(start), "UTF-8"));
        } catch (UnsupportedEncodingException e) {
            // shouldn't occur
            LOGGER.error("Unexpected exception.", e);
        }
        return builder.toString();
    }
}