org.deri.iris.queryrewriting.FORewriter.java Source code

Java tutorial

Introduction

Here is the source code for org.deri.iris.queryrewriting.FORewriter.java

Source

/*
 * Integrated Rule Inference System (IRIS):
 * An extensible rule inference system for datalog with extensions.
 * 
 * Copyright (C) 2009 ICT Institute - Dipartimento di Elettronica e Informazione (DEI), 
 * Politecnico di Milano, Via Ponzio 34/5, 20133 Milan, Italy.
 * 
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 * 
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 * 
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, 
 * MA  02110-1301, USA.
 */
package org.deri.iris.queryrewriting;

import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.commons.lang3.tuple.Pair;
import org.apache.log4j.Logger;
import org.deri.iris.Reporter;
import org.deri.iris.api.basics.IAtom;
import org.deri.iris.api.basics.ILiteral;
import org.deri.iris.api.basics.IPosition;
import org.deri.iris.api.basics.IRule;
import org.deri.iris.api.terms.ITerm;
import org.deri.iris.api.terms.IVariable;
import org.deri.iris.basics.Position;
import org.deri.iris.factory.Factory;
import org.deri.iris.queryrewriting.configuration.NCCheck;
import org.deri.iris.queryrewriting.configuration.SubCheckStrategy;
import org.deri.iris.utils.TermMatchingAndSubstitution;

import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;

/**
 * A class to produce a conjunctive query rewriting of a datalog query w.r.t. a set of FO-reducible TGDs
 * @author Giorgio Orsi <orsi@elet.polimi.it> - Politecnico di Milano
 */
public class FORewriter implements QueryRewriter {

    private static final Logger LOGGER = Logger.getLogger(FORewriter.class);
    protected static final Reporter rep = Reporter.getInstance();;

    protected final IRule query;
    protected final List<IRule> mRules;
    protected final Set<IRule> cns;
    protected final SubCheckStrategy subchkStrategy;
    protected final NCCheck ncchkStrategy;
    protected final Map<Pair<IPosition, IPosition>, Set<List<IRule>>> deps;

    @Override
    public Set<IRule> call() throws Exception {
        return rewrite();
    }

    FORewriter(final IRule query, final List<IRule> rules, final Set<IRule> constraints,
            final Map<Pair<IPosition, IPosition>, Set<List<IRule>>> posDeps, final SubCheckStrategy subchkStrategy,
            final NCCheck ncchkStrategy) {

        this.query = query;
        mRules = rules;
        cns = constraints;
        deps = posDeps;
        this.subchkStrategy = subchkStrategy;
        this.ncchkStrategy = ncchkStrategy;

    }

    @Override
    public Set<IRule> rewrite() {

        // partial rewritings (will be returned)
        final QueryGraph partialRewritings = new QueryGraph();

        // rewritten queries (mark all those queries that have been already used in a rewriting step)
        final Set<IRule> exploredQueries = new HashSet<IRule>();

        final List<IRule> newQueries = Lists.newArrayList(RenamingUtils.canonicalRenaming(query, "U"));

        // The input query is always part of the rewriting
        partialRewritings.addRule(newQueries.iterator().next());

        // The temporary rewritings
        List<IRule> tempRew;
        do {

            tempRew = ImmutableList.copyOf(newQueries);
            newQueries.clear();

            // For each new query q
            for (final IRule qn : tempRew) {

                // avoid double exploration
                if (!exploredQueries.contains(qn)) {
                    exploredQueries.add(qn);

                    // For each TGD
                    for (IRule r : mRules) {

                        r = RenamingUtils.canonicalRenaming(r, "V");

                        IRule qPrime = Factory.BASIC.createRule(qn.getHead(), qn.getBody());

                        /*
                         * Check if the new query factorizes w.r.t the TGD r
                         */
                        if ((r.getExistentialVariables().size() > 0) && (qPrime.getBody().size() > 1) && qPrime
                                .getPredicates().contains(r.getHead().iterator().next().getAtom().getPredicate())) {
                            final Map<IVariable, ITerm> sbstMap = new HashMap<IVariable, ITerm>();
                            final long factorTime = System.currentTimeMillis();
                            if (factorisable(qPrime, r, sbstMap)) {
                                qPrime = RenamingUtils.canonicalRenaming(qPrime, "U");
                                exploredQueries.add(qPrime);
                            }
                            rep.addToValue(RewMetric.FACTOR_TIME, (System.currentTimeMillis() - factorTime));
                        }
                        /*
                         * Apply the TGD until it is possible
                         */

                        for (final ILiteral l : qPrime.getBody()) {
                            // get the atom a
                            final IAtom a = l.getAtom();

                            final Map<IVariable, ITerm> gamma = new HashMap<IVariable, ITerm>();
                            // Check if the rule is applicable to the atom a
                            if (RewritingUtils.isApplicable(r, qPrime, a, gamma)) {

                                // rewrite the atom a with the body of the rule sigma
                                final IRule qrew = RenamingUtils.canonicalRenaming(
                                        RewritingUtils.rewrite(qPrime, a, r.getBody(), gamma), "U");

                                if (!exploredQueries.contains(qrew)) {
                                    newQueries.add(qrew);
                                    partialRewritings.addRule(qn, qrew);

                                }

                            }
                        }
                    }
                }
            }
            LOGGER.trace("|" + partialRewritings.size() + " [" + newQueries.size() + "]");
            rep.addToValue(RewMetric.GENERATED_QUERIES, (long) newQueries.size());

            // Checking subsumption
            if (subchkStrategy.equals(SubCheckStrategy.INTRAREW)) {
                LOGGER.debug("Applying intra-rewriting subsumption check on " + tempRew.size() + " queries.");
                final long subCheckTime = System.currentTimeMillis();
                RewritingUtils.purgeSubsumed(partialRewritings);
                rep.addToValue(RewMetric.SUBCHECK_TIME, System.currentTimeMillis() - subCheckTime);
            }
        } while (!newQueries.isEmpty());

        // Cleaning auxiliary predicates
        LOGGER.debug("Cleaning auxiliary predicates from " + partialRewritings.getRules().size() + " queries.");
        final long auxCleaningTime = System.currentTimeMillis();
        CleaningUtils.cleanRewriting(partialRewritings, new String[] { "aux_" });
        rep.addToValue(RewMetric.AUX_CLEANING_TIME, System.currentTimeMillis() - auxCleaningTime);
        LOGGER.debug("done.");

        if (subchkStrategy.equals(SubCheckStrategy.INTRADEC)) {
            LOGGER.debug("Applying intra-decomposition subsumption check on " + partialRewritings.getRules().size()
                    + " queries.");
            final long subCheckTime = System.currentTimeMillis();
            RewritingUtils.purgeSubsumed(partialRewritings);
            rep.addToValue(RewMetric.SUBCHECK_TIME, System.currentTimeMillis() - subCheckTime);
            LOGGER.debug("done.");
        }

        rep.addToValue(RewMetric.EXPLORED_QUERIES, (long) exploredQueries.size());
        return (ImmutableSet.copyOf(partialRewritings.getRules()));
    }

    /**
     * Checks if the n atoms are sharing the same variables in all the existential positions
     * @param q a conjunctive query
     * @param r a TGD
     * @param a1 the first atom
     * @param a2 the second atom
     * @return true if they share the same variables in all the existential positions
     */
    protected boolean factorisable(IRule q, final IRule r, final Map<IVariable, ITerm> sbstMap) {

        rep.incrementValue(RewMetric.FACTOR_COUNT);

        if (q.getBody().size() > 1) {
            // Get the atoms in body(q) that unify with head(r).

            final IAtom rheadAtom = r.getHead().iterator().next().getAtom();
            final Set<IPosition> headExPos = r.getExistentialPositions();

            final Set<IAtom> potentialUnifiableAtoms = new LinkedHashSet<IAtom>();
            for (final ILiteral l : q.getBody()) {
                final IAtom qbodyAtom = l.getAtom();

                if (qbodyAtom.getPredicate().equals(rheadAtom.getPredicate())) {
                    potentialUnifiableAtoms.add(qbodyAtom);
                }
            }
            if (potentialUnifiableAtoms.size() < 2)
                return false;
            else {

                // compute the powerset of atoms that are potentially unifiable in the body in the query.
                final Set<Set<IAtom>> atomsPowSet = Sets.powerSet(potentialUnifiableAtoms);
                // sort the set by size
                final List<Set<IAtom>> sortedPowSet = Lists.newArrayList(atomsPowSet);
                Collections.sort(sortedPowSet, new SetSizeComparator());

                for (final Set<IAtom> candidateSet : sortedPowSet) {
                    // check that we have at least two atoms in the candidate set.
                    if (candidateSet.size() > 1) {
                        final Map<IVariable, ITerm> unifier = new HashMap<IVariable, ITerm>();
                        if (TermMatchingAndSubstitution.unify(candidateSet, unifier)) {
                            // the atoms have a unifier, check that there is a well-behaved existential variable

                            // get variables in existential positions
                            final Set<IVariable> variables = getVariablesInPositions(candidateSet, headExPos);
                            for (final IVariable var : variables) {
                                // check that the variable does not occur in non-existential positions
                                if (headExPos.containsAll(q.getPositions(var))
                                        && containedInAllAtoms(var, candidateSet)) {
                                    q = RewritingUtils.factoriseQuery(q, unifier);
                                    return true;
                                }
                            }

                        }
                    }
                }

                return false;
            }
        } else
            return false;
    }

    protected boolean containedInAllAtoms(final IVariable var, final Set<IAtom> candidateSet) {

        for (final IAtom atom : candidateSet) {
            if (!atom.getTuple().contains(var))
                return false;
        }
        return true;
    }

    protected Set<IVariable> getVariablesInPositions(final Set<IAtom> candidateSet,
            final Set<IPosition> positions) {
        final Set<IVariable> exPosVariables = new LinkedHashSet<IVariable>();
        for (final IAtom atom : candidateSet) {
            int pos = 0;
            for (final ITerm t : atom.getTuple()) {
                pos++;
                final IPosition curPos = new Position(atom.getPredicate().getPredicateSymbol(), pos);
                if ((t instanceof IVariable) && positions.contains(curPos)) {
                    exPosVariables.add((IVariable) t);
                }
            }
        }
        return exPosVariables;
    }
}