org.dllearner.utilities.examples.AutomaticNegativeExampleFinderSPARQL2.java Source code

Java tutorial

Introduction

Here is the source code for org.dllearner.utilities.examples.AutomaticNegativeExampleFinderSPARQL2.java

Source

/**
 * Copyright (C) 2007 - 2016, Jens Lehmann
 *
 * This file is part of DL-Learner.
 *
 * DL-Learner is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 3 of the License, or
 * (at your option) any later version.
 *
 * DL-Learner is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
package org.dllearner.utilities.examples;

import com.google.common.collect.*;
import org.aksw.jena_sparql_api.core.QueryExecutionFactory;
import org.aksw.jena_sparql_api.http.QueryExecutionFactoryHttp;
import org.apache.jena.query.QueryExecution;
import org.apache.jena.query.QuerySolution;
import org.apache.jena.query.ResultSet;
import org.dllearner.kb.sparql.SparqlEndpoint;
import org.dllearner.reasoning.SPARQLReasoner;
import org.dllearner.utilities.datastructures.SetManipulation;
import org.semanticweb.owlapi.model.*;
import org.semanticweb.owlapi.vocab.OWLRDFVocabulary;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import uk.ac.manchester.cs.owl.owlapi.OWLDataFactoryImpl;

import java.util.*;
import java.util.Map.Entry;

import static org.dllearner.utilities.examples.AutomaticNegativeExampleFinderSPARQL2.Strategy.*;

/**
 * 
 * Utility class for automatically retrieving negative examples from a 
 * SPARQL endpoint given a set of positive examples.
 * 
 * @author Jens Lehmann
 * @author Sebastian Hellmann
 *
 */
public class AutomaticNegativeExampleFinderSPARQL2 {

    private static final Logger logger = LoggerFactory
            .getLogger(AutomaticNegativeExampleFinderSPARQL2.class.getSimpleName());

    private OWLDataFactory df = new OWLDataFactoryImpl();

    public enum Strategy {
        SUPERCLASS, SIBLING, RANDOM
    }

    // for re-using existing queries
    private SPARQLReasoner sr;

    private String namespace;
    private QueryExecutionFactory qef;

    public AutomaticNegativeExampleFinderSPARQL2(SparqlEndpoint se) {
        this(new QueryExecutionFactoryHttp(se.getURL().toString(), se.getDefaultGraphURIs()));
    }

    public AutomaticNegativeExampleFinderSPARQL2(QueryExecutionFactory qef) {
        this(new SPARQLReasoner(qef));
    }

    public AutomaticNegativeExampleFinderSPARQL2(SPARQLReasoner reasoner) {
        this.sr = reasoner;
        this.qef = reasoner.getQueryExecutionFactory();
    }

    public SortedSet<OWLIndividual> getNegativeExamples(OWLClass classToDescribe,
            Set<OWLIndividual> positiveExamples, int limit) {
        return getNegativeExamples(classToDescribe, positiveExamples, Arrays.asList(SUPERCLASS, SIBLING, RANDOM),
                limit);
    }

    public SortedSet<OWLIndividual> getNegativeExamples(OWLClass classToDescribe,
            Set<OWLIndividual> positiveExamples, Collection<Strategy> strategies, int limit) {
        Map<Strategy, Double> strategiesWithWeight = Maps.newLinkedHashMap();
        double weight = 1d / strategies.size();
        for (Strategy strategy : strategies) {
            strategiesWithWeight.put(strategy, weight);
        }
        return getNegativeExamples(classToDescribe, positiveExamples, strategiesWithWeight, limit);
    }

    public SortedSet<OWLIndividual> getNegativeExamples(OWLClass classToDescribe,
            Set<OWLIndividual> positiveExamples, Map<Strategy, Double> strategiesWithWeight,
            int maxNrOfReturnedInstances) {
        //set class to describe as the type for each instance
        Multiset<OWLClass> types = HashMultiset.create();
        types.add(classToDescribe);

        return computeNegativeExamples(classToDescribe, types, strategiesWithWeight, maxNrOfReturnedInstances);
    }

    public SortedSet<OWLIndividual> getNegativeExamples(Set<OWLIndividual> positiveExamples, int limit) {
        return getNegativeExamples(positiveExamples, Arrays.asList(SUPERCLASS, SIBLING, RANDOM), limit);
    }

    public SortedSet<OWLIndividual> getNegativeExamples(Set<OWLIndividual> positiveExamples,
            Collection<Strategy> strategies, int limit) {
        Map<Strategy, Double> strategiesWithWeight = new HashMap<>();
        double weight = 1d / strategies.size();
        for (Strategy strategy : strategies) {
            strategiesWithWeight.put(strategy, weight);
        }
        return getNegativeExamples(positiveExamples, strategiesWithWeight, limit);
    }

    public SortedSet<OWLIndividual> getNegativeExamples(Set<OWLIndividual> positiveExamples,
            Map<Strategy, Double> strategiesWithWeight, int maxNrOfReturnedInstances) {
        //get the types for each instance
        Multiset<OWLClass> types = HashMultiset.create();
        for (OWLIndividual ex : positiveExamples) {
            types.addAll(sr.getTypes(ex));
        }

        //remove types that do not have the given namespace
        types = filterByNamespace(types);

        //keep the most specific types
        keepMostSpecificClasses(types);
        return computeNegativeExamples(null, types, strategiesWithWeight, maxNrOfReturnedInstances);
    }

    private SortedSet<OWLIndividual> computeNegativeExamples(OWLClass classToDescribe,
            Multiset<OWLClass> positiveExamplesTypes, Map<Strategy, Double> strategiesWithWeight,
            int maxNrOfReturnedInstances) {
        SortedSet<OWLIndividual> negativeExamples = new TreeSet<>();

        for (Entry<Strategy, Double> entry : strategiesWithWeight.entrySet()) {
            Strategy strategy = entry.getKey();
            Double weight = entry.getValue();

            // the max number of instances returned by the current strategy
            int strategyLimit = (int) (weight * maxNrOfReturnedInstances);

            // the highest frequency value
            int maxFrequency = positiveExamplesTypes.entrySet().iterator().next().getCount();

            if (strategy == SIBLING) {//get sibling class based examples
                negativeExamples.addAll(negativeExamplesBySiblingClasses(positiveExamplesTypes, strategyLimit,
                        maxNrOfReturnedInstances));
            } else if (strategy == SUPERCLASS) {//get super class based examples
                negativeExamples.addAll(negativeExamplesBySuperClasses(positiveExamplesTypes, negativeExamples,
                        strategyLimit, maxNrOfReturnedInstances));
            } else if (strategy == RANDOM) {//get some random examples
                logger.info("Applying random strategy...");
                SortedSet<OWLIndividual> randomNegativeExamples = new TreeSet<>();
                String query = "SELECT DISTINCT ?s WHERE {?s a ?type. ?type a owl:Class .";
                if (classToDescribe != null) {
                    query += "FILTER NOT EXISTS{?s a <" + classToDescribe.toStringID() + "> }";
                } else {
                    for (OWLClass nc : positiveExamplesTypes.elementSet()) {

                    }
                    throw new UnsupportedOperationException(
                            "Currently it's not possible to get random examples for unknown class to describe.");
                }

                query += "} LIMIT " + maxNrOfReturnedInstances;

                try (QueryExecution qe = qef.createQueryExecution(query)) {
                    ResultSet rs = qe.execSelect();
                    while (rs.hasNext()) {
                        QuerySolution qs = rs.next();
                        randomNegativeExamples
                                .add(df.getOWLNamedIndividual(IRI.create(qs.getResource("s").getURI())));
                    }
                }
                randomNegativeExamples.removeAll(negativeExamples);
                negativeExamples.addAll(new ArrayList<>(randomNegativeExamples).subList(0, Math
                        .min(randomNegativeExamples.size(), maxNrOfReturnedInstances - negativeExamples.size())));
                logger.info("Negative examples(" + randomNegativeExamples.size() + "): " + randomNegativeExamples);
            }
        }
        return negativeExamples;
    }

    private SortedSet<OWLIndividual> negativeExamplesBySuperClasses(Multiset<OWLClass> positiveExamplesTypes,
            Set<OWLIndividual> negativeExamples, int cnt, int totalCnt) {
        logger.info("Applying super class strategy...");
        SortedSet<OWLIndividual> negExamples = new TreeSet<>();
        //for each type of the positive examples
        for (OWLClass nc : positiveExamplesTypes.elementSet()) {
            int frequency = positiveExamplesTypes.count(nc);
            //get super classes
            Set<OWLClassExpression> superClasses = sr.getSuperClasses(nc);
            superClasses.remove(df.getOWLThing());
            //               superClasses.remove(Thing.instance);
            superClasses.remove(df.getOWLClass(OWLRDFVocabulary.RDFS_RESOURCE.getIRI()));
            superClasses = filterByNamespace(superClasses);
            logger.info("Super classes: " + superClasses);

            int limit = (int) Math
                    .ceil(((double) frequency / positiveExamplesTypes.size()) / superClasses.size() * cnt);
            //get instances for each super class
            for (OWLClassExpression superClass : superClasses) {
                SortedSet<OWLIndividual> individuals = sr.getIndividualsExcluding(superClass, nc, totalCnt);
                individuals.removeAll(negativeExamples);
                individuals.removeAll(negExamples);
                SetManipulation.stableShrink(individuals, limit);
                negExamples.addAll(individuals);
            }
        }
        negExamples = SetManipulation.stableShrink(negExamples, cnt);
        logger.info("Negative examples(" + negExamples.size() + "): " + negExamples);
        return negExamples;
    }

    private SortedSet<OWLIndividual> negativeExamplesBySiblingClasses(Multiset<OWLClass> positiveExamplesTypes,
            int cnt, int totalCnt) {
        logger.info("Applying sibling classes strategy...");
        SortedSet<OWLIndividual> negExamples = new TreeSet<>();

        // for each type of the positive examples
        for (OWLClass nc : positiveExamplesTypes.elementSet()) {
            int frequency = positiveExamplesTypes.count(nc);

            // get sibling classes
            Set<OWLClass> siblingClasses = sr.getSiblingClasses(nc);
            siblingClasses = filterByNamespace(siblingClasses);
            logger.info("Sibling classes: " + siblingClasses);

            int limit = (int) Math
                    .ceil(((double) frequency / positiveExamplesTypes.size()) / siblingClasses.size() * cnt);

            // get instances for each sibling class
            for (OWLClass siblingClass : siblingClasses) {
                SortedSet<OWLIndividual> individuals = sr.getIndividualsExcluding(siblingClass, nc, totalCnt);
                individuals.removeAll(negExamples);
                SetManipulation.stableShrink(individuals, limit);
                negExamples.addAll(individuals);
            }
        }
        negExamples = SetManipulation.stableShrink(negExamples, cnt);
        logger.info("Negative examples(" + negExamples.size() + "): " + negExamples);
        return negExamples;
    }

    private <T extends OWLClassExpression> Set<T> filterByNamespace(Set<T> classes) {
        if (namespace != null) {
            return Sets.filter(classes, input -> input.toString().startsWith(namespace));
        }
        return classes;
    }

    private Multiset<OWLClass> filterByNamespace(Multiset<OWLClass> classes) {
        if (namespace != null) {
            return Multisets.filter(classes, input -> input.toStringID().startsWith(namespace));
        }
        return classes;
    }

    private void keepMostSpecificClasses(Multiset<OWLClass> classes) {
        HashMultiset<OWLClass> copy = HashMultiset.create(classes);
        for (OWLClass nc1 : copy.elementSet()) {
            for (OWLClass nc2 : copy.elementSet()) {
                if (!nc1.equals(nc2)) {
                    //remove class nc1 if it is superclass of another class nc2
                    boolean isSubClassOf = false;
                    if (sr.getClassHierarchy() != null) {
                        isSubClassOf = sr.getClassHierarchy().isSubclassOf(nc2, nc1);
                    } else {
                        isSubClassOf = sr.isSuperClassOf(nc1, nc2);
                    }
                    if (isSubClassOf) {
                        classes.remove(nc1, classes.count(nc1));
                        break;
                    }
                }
            }
        }
    }
}