org.openanzo.glitter.query.Projection.java Source code

Java tutorial

Introduction

Here is the source code for org.openanzo.glitter.query.Projection.java

Source

/*******************************************************************************
 * Copyright (c) 2004, 2007 IBM Corporation and Cambridge Semantics Incorporated.
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the Eclipse Public License v1.0
 * which accompanies this distribution, and is available at
 * http://www.eclipse.org/legal/epl-v10.html
 *
 * File: $Source: /cvsroot/slrp/glitter/com.ibm.adtech.glitter/src/com/ibm/adtech/glitter/query/Projection.java,v $
 * Created by:  Lee Feigenbaum (<a href="mailto:feigenbl@us.ibm.com">feigenbl@us.ibm.com</a>)
 * Created on: 10/23/06
 * Revision: $Id: Projection.java 164 2007-07-31 14:11:09Z mroy $
 *
 * Contributors: IBM Corporation - initial API and implementation
 *     Cambridge Semantics Incorporated - Fork to Anzo
 *******************************************************************************/
package org.openanzo.glitter.query;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.Map.Entry;

import org.apache.commons.collections15.CollectionUtils;
import org.apache.commons.collections15.Transformer;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.lang.builder.HashCodeBuilder;
import org.openanzo.analysis.RequestAnalysis;
import org.openanzo.exceptions.ExceptionConstants;
import org.openanzo.exceptions.LogUtils;
import org.openanzo.glitter.exception.ExpressionEvaluationException;
import org.openanzo.glitter.exception.GlitterRuntimeException;
import org.openanzo.glitter.exception.UnnamedProjectionException;
import org.openanzo.glitter.expression.AggregateFunction;
import org.openanzo.glitter.query.QueryController.QueryStringPrintOptions;
import org.openanzo.glitter.syntax.abstrakt.Expression;
import org.openanzo.glitter.syntax.abstrakt.FunctionCall;
import org.openanzo.glitter.syntax.abstrakt.SimpleExpression;
import org.openanzo.rdf.URI;
import org.openanzo.rdf.Value;
import org.openanzo.rdf.Variable;
import org.openanzo.rdf.utils.PrettyPrinter;

/**
 * {@link Projection} handles the <tt>SELECT</tt> SPARQL query form, in which the result set is returned after being projected out to only the selected
 * variables.
 * 
 * 
 * @author lee <lee@cambridgesemantics.com>
 * 
 */
public class Projection implements QueryResultForm {
    //static private Variable countVariable = MemVariable.createVariable("count");

    // these two lists are parallel. That is, projectedExpression[i] gets projected as projectedAs[i]
    private final List<Expression> projectedExpressions;

    private final List<Variable> projectedAs;

    private final List<Variable> groupByVars;

    private final Map<Variable, Expression> aliasMap;

    private final boolean isSelectStar;

    private final boolean distinct;

    private final boolean reduced;

    private boolean isAggregate;

    /**
     * This constructor takes ownership of the lists passed in to it.
     * 
     * @param expressions
     *            The expressions being selected; should be filled in even if isSelectStar is true
     * @param variables
     *            The variable names for the expressions being selected
     * @param groupByVars
     *            If non-null and non-empty, solutions are generated once per unique set of values identified by these variables. Note that the presence of a
     *            single aggregate function call in <tt>expressions</tt> indicates that solutions are grouped; if an aggregate occurs and groupByVars is null or
     *            empty, then the solutions are processed as a single group.
     * @param isSelectStar
     *            If <tt>true</tt>, this is a <tt>SELECT *</tt> projection; used for serialization.
     * @param distinct
     *            If <tt>true</tt>, return only distinct resultset rows
     * @param reduced
     *            If <tt>true</tt> (and <tt>distinct</tt> is <tt>false</tt>), indicates that the implementation is free to return anywhere between 1 and the
     *            standard (algebra-defined) cardinality of each solution.
     * @throws UnnamedProjectionException
     * @throws UnnamedProjectionException
     */
    public Projection(List<Expression> expressions, List<Variable> variables, List<Variable> groupByVars,
            boolean isSelectStar, boolean distinct, boolean reduced) throws UnnamedProjectionException {
        this.projectedExpressions = expressions;
        this.projectedAs = variables;
        this.aliasMap = new HashMap<Variable, Expression>();
        this.groupByVars = groupByVars != null ? groupByVars : new ArrayList<Variable>();
        this.isSelectStar = isSelectStar;
        this.distinct = distinct;
        this.reduced = reduced;
        initialize();
    }

    private void initialize() throws UnnamedProjectionException {
        // determine if this is an aggregate
        if (!this.groupByVars.isEmpty()) {
            this.isAggregate = true;
        } else {
            // see if any aggregate functions are involved, which means we're grouping in one big (happy) group
            LinkedList<Expression> expressions = new LinkedList<Expression>(this.projectedExpressions);
            while (!expressions.isEmpty()) {
                Expression e = expressions.removeFirst();
                if (e instanceof FunctionCall) {
                    FunctionCall fc = (FunctionCall) e;
                    if (fc.getFunction() instanceof AggregateFunction) {
                        this.isAggregate = true;
                        break;
                    }
                    expressions.addAll(fc.getArguments());
                }
            }
        }
        // check that we have an output name for every projected expression
        if (this.projectedAs.size() < this.projectedExpressions.size())
            throw new UnnamedProjectionException(this.projectedExpressions.get(this.projectedAs.size()));
        if (this.projectedAs.size() > this.projectedExpressions.size())
            throw new GlitterRuntimeException(ExceptionConstants.GLITTER.MORE_NAMES);
        int i;
        if ((i = this.projectedAs.indexOf(null)) != -1)
            throw new UnnamedProjectionException(this.projectedExpressions.get(i));
        for (i = 0; i < this.projectedAs.size(); i++)
            this.aliasMap.put(this.projectedAs.get(i), this.projectedExpressions.get(i));
    }

    /**
     * 
     * @return Whether this projection deals with aggregated solutions
     */
    public boolean isAggregateProjection() {
        return this.isAggregate;
    }

    /**
     * 
     * @return Whether this projection eliminates duplicate result rows
     */
    public boolean isDistinct() {
        return this.distinct;
    }

    /**
     * Return true if this projection is reduced
     * 
     * @return Whether this is a <tt>REDUCED</tt> query.
     */
    public boolean isReduced() {
        return this.reduced;
    }

    /**
     * 
     * @return The list of variables that form the result columns for this projection.
     */
    public List<Variable> getResultVariables() {
        return new ArrayList<Variable>(this.projectedAs);
    }

    /**
     * 
     * @return The expressions projected from this query.
     */
    public List<Expression> getProjectedExpressions() {
        return new ArrayList<Expression>(this.projectedExpressions);
    }

    /**
     * @return The variables that contribute to the projected results.
     */
    public Set<Variable> getReferencedVariables() {
        // @@ this could be memo'ized if helpful
        HashSet<Variable> vars = new HashSet<Variable>();
        for (Expression e : this.projectedExpressions) {
            vars.addAll(e.getReferencedVariables());
        }
        return vars;
    }

    public Collection<Variable> getBindableVariables() {
        return Collections.emptyList();
    }

    public Collection<URI> getReferencedURIs() {
        HashSet<URI> uris = new HashSet<URI>();
        for (Expression e : this.projectedExpressions)
            uris.addAll(e.getReferencedURIs());
        return uris;
    }

    /**
     * 
     * @return The list of group by variables.
     */
    public List<Variable> getGroupByVariables() {
        return new ArrayList<Variable>(this.groupByVars);
    }

    public Object serializeResults(SolutionSet results) {
        return serializeResultsAsResultSet(results);
    }

    /**
     * Serializing a {@link Projection} is a no-op.
     * 
     * @param solutions
     *            The refined {@link SolutionSet}
     * @return The refined {@link SolutionSet}
     */
    private SolutionSet serializeResultsAsResultSet(SolutionSet solutions) {
        return solutions;
    }

    public SolutionSet refineSolutionsBeforeOrdering(SolutionSet results) {
        boolean isEnabled = RequestAnalysis.getAnalysisLogger().isDebugEnabled();
        long start = 0;
        if (isEnabled) {
            start = System.currentTimeMillis();
            StringBuilder sb = new StringBuilder();
            this.prettyPrint(sb);
            RequestAnalysis.getAnalysisLogger().debug(LogUtils.GLITTER_MARKER,
                    "[glitter_Projection_refiningSolutions] [{}] {}", sb.toString(), results.size());
        }
        SolutionList projection = new SolutionList();
        if (isAggregateProjection()) {
            // Create groups based on GROUP BY variables
            HashMap<GroupKey, SolutionSet> groups = new HashMap<GroupKey, SolutionSet>();
            if (this.groupByVars == null || this.groupByVars.isEmpty()) {
                groups.put(new GroupKey(), results);
            } else {
                for (PatternSolution solution : results) {
                    GroupKey key = null;
                    try {
                        key = new GroupKey(solution);
                    } catch (ExpressionEvaluationException eee) {
                        continue;
                    }
                    if (!groups.containsKey(key))
                        groups.put(key, new SolutionList());
                    SolutionSet groupSolutions = groups.get(key);
                    groupSolutions.add(solution);
                }
            }
            // For each group, process (evaluate) the aggregates and simple variables and fill out the solution
            for (Entry<GroupKey, SolutionSet> entry : groups.entrySet()) {
                // representative has all the proper values for the GROUP BY variables
                PatternSolution representative = entry.getValue().size() > 0 ? entry.getValue().get(0) : null;
                for (int i = 0; i < this.projectedExpressions.size(); i++) {
                    Expression e = this.projectedExpressions.get(i);
                    Variable var = this.projectedAs.get(i);
                    Value val;
                    try {
                        val = e.evaluate(representative, entry.getValue());
                    } catch (ExpressionEvaluationException eee) {
                        val = null;
                    }
                    if (val != null)
                        entry.getKey().aggregateSolution.setBinding(var, val);
                }
                // @@ if we add a HAVING clause, this is where those expressions are checked to be true
                // before including this aggregate solution
                projection.add(entry.getKey().aggregateSolution);
            }
        } else {
            // Evaluate any expressions (a variable simply looks its value up in the environment)
            for (PatternSolution solution : results) {
                PatternSolutionImpl projected = new PatternSolutionImpl(solution);
                boolean keepSolution = true;
                for (int i = 0; i < this.projectedExpressions.size(); i++) {
                    Expression e = this.projectedExpressions.get(i);
                    Variable var = this.projectedAs.get(i);
                    Value val = null;
                    try {
                        val = e.evaluate(solution, null);
                    } catch (ExpressionEvaluationException eee) {
                        keepSolution = false;
                        break;
                    }
                    if (val != null)
                        projected.setBinding(var, val);
                }
                if (keepSolution)
                    projection.add(projected);
            }
        }
        if (isEnabled) {
            RequestAnalysis.getAnalysisLogger().debug(LogUtils.GLITTER_MARKER,
                    "[glitter_Projection_evaluatedAndProjectedSolutions] {}:{}", projection.size(),
                    System.currentTimeMillis() - start);
            RequestAnalysis.getAnalysisLogger().debug(LogUtils.TIMING_MARKER,
                    "glitter_Projection_evaluatedAndProjectedSolutions,{},{}", System.currentTimeMillis() - start,
                    projection.size());
        }
        return projection;
    }

    public SolutionSet refineSolutionsAfterOrdering(SolutionSet solutions,
            List<OrderingCondition> sortedByConditions) {
        SolutionList projection = new SolutionList();
        int projectedBindings = this.projectedAs.size();
        for (PatternSolution solution : solutions) {
            if (solution.size() == projectedBindings) {
                projection.add(solution);
            } else {
                PatternSolutionImpl newSolution = new PatternSolutionImpl();
                for (Variable v : this.projectedAs) {
                    Value val = solution.getBinding(v);
                    if (val != null)
                        newSolution.setBinding(v, val);
                }
                projection.add(newSolution);
            }
        }

        if (this.distinct) {
            projection = projectDistinctSolutions(projection, this.projectedExpressions, sortedByConditions);
        }

        // the solutions as we were given them may already be ordered - we need to
        // preserve that order. given that, how do we efficiently find duplicates?
        // for now, we inefficiently find duplicates :-/
        return projection;
    }

    /**
     * Project distinct solutions
     * 
     * @param projection
     * @param sortedByConditions
     * @param projectedExpressiond
     * @return distinct solutions list
     */
    public static SolutionList projectDistinctSolutions(SolutionSet projection,
            List<Expression> projectedExpressions, List<OrderingCondition> sortedByConditions) {
        SolutionList distinctProjection = new SolutionList();
        PatternSolution solutions[] = projection.toArray(new PatternSolution[0]);
        if (solutions.length == 1) {
            distinctProjection.add(solutions[0]);
        } else {
            boolean solutionsAreSorted = false;
            if (projectedExpressions != null && sortedByConditions != null) {
                // we only care that the solutions are sorted if the sorting conditions lead to a total order
                // on the projected solutions - this is the case if all the projected expressions are involved
                // in the sorting, and no other sorting condition pre-empts them
                int matchedExpressions = 0;
                for (OrderingCondition oc : sortedByConditions) {
                    if (projectedExpressions.contains(oc.getCondition()))
                        matchedExpressions++;
                    else
                        break;
                }
                if (matchedExpressions == projectedExpressions.size())
                    solutionsAreSorted = true;
            }
            // If you sort the solutions then you can shortcut a lot of comparisons:
            // a.) you only compare up until the other solution compares greater than you
            // But we can only do this if we don't need to maintain the order of a pre-projection sort!
            if (!solutionsAreSorted && sortedByConditions == null) {
                Arrays.sort(solutions, 0, solutions.length);
                solutionsAreSorted = true;
            }
            if (solutionsAreSorted) {
                PatternSolution current = null;
                for (int i = 0; i < solutions.length; i++) {
                    if (current == null || !current.equals(solutions[i])) {
                        distinctProjection.add(solutions[i]);
                        current = solutions[i];
                    }
                }
            } else {
                // this is the slow (n^2) way of finding dupes
                for (int i = 0; i < projection.size(); i++) {
                    boolean dupe_later = false;
                    PatternSolution pi = projection.get(i);
                    for (int j = i + 1; j < projection.size(); j++) {
                        PatternSolution pj = projection.get(j);
                        if (PatternSolutionImpl.containMatchingBindings(pj, pi)) {
                            dupe_later = true;
                            break;
                        }
                    }
                    if (!dupe_later)
                        distinctProjection.add(pi);
                }
            }
        }
        return distinctProjection;
    }

    @Override
    public String toString() {
        StringBuilder builder = new StringBuilder();
        builder.append("SELECT");
        if (this.distinct)
            builder.append(" DISTINCT");
        if (this.reduced)
            builder.append(" REDUCED");
        if (this.isSelectStar) {
            builder.append(" *");
        } else {
            for (int i = 0; i < this.projectedExpressions.size(); i++) {
                Expression e = this.projectedExpressions.get(i);
                Variable var = this.projectedAs.get(i);
                if (expressionWrapsVariable(e, var)) {
                    builder.append(" " + var);
                } else {
                    builder.append(" (");
                    builder.append(e);
                    builder.append(" AS ");
                    builder.append(var);
                    builder.append(")");
                }
            }
        }
        return builder.toString();
    }

    private boolean expressionWrapsVariable(Expression e, Variable v) {
        return e instanceof SimpleExpression && ((SimpleExpression) e).getTerm().equals(v);
    }

    public void prettyPrint(StringBuilder buffer) {
        buffer.append("Projection(");

        if (this.isDistinct())
            buffer.append("DISTINCT, ");

        if (this.isReduced())
            buffer.append("REDUCED, ");

        if (this.isSelectStar)
            buffer.append("*");
        else {
            for (int i = 0; i < this.projectedExpressions.size(); i++) {
                Expression e = this.projectedExpressions.get(i);
                Variable var = this.projectedAs.get(i);
                if (i > 0)
                    buffer.append(", ");
                if (expressionWrapsVariable(e, var)) {
                    buffer.append(PrettyPrinter.print(var));
                } else {
                    buffer.append("ProjectAs(");
                    buffer.append(PrettyPrinter.print(e));
                    buffer.append(", ");
                    buffer.append(PrettyPrinter.print(var));
                    buffer.append(")");
                }
            }
        }

        if (groupByVars != null && !groupByVars.isEmpty()) {
            buffer.append(", ");
            buffer.append("GroupBy(");
            buffer.append(
                    StringUtils.join(CollectionUtils.collect(this.groupByVars, new Transformer<Variable, String>() {
                        public String transform(Variable input) {
                            return PrettyPrinter.print(input);
                        }
                    }), ", "));
            buffer.append(")");
        }
        buffer.append(")");
    }

    public void prettyPrintQueryPart(EnumSet<QueryStringPrintOptions> printFlags, int indentLevel,
            Map<String, String> uri2prefix, StringBuilder s) {
        s.append("SELECT");
        if (this.isDistinct())
            s.append(" DISTINCT");
        if (this.isReduced())
            s.append(" REDUCED");
        if (this.isSelectStar)
            s.append(" *");
        else {
            for (int i = 0; i < this.projectedExpressions.size(); i++) {
                Expression e = this.projectedExpressions.get(i);
                Variable var = this.projectedAs.get(i);
                s.append(" ");
                if (expressionWrapsVariable(e, var)) {
                    QueryController.printTriplePatternComponent(var, printFlags, uri2prefix, s);
                } else {
                    s.append("(");
                    e.prettyPrintQueryPart(printFlags, indentLevel, uri2prefix, s);
                    s.append(" AS ");
                    QueryController.printTriplePatternComponent(var, printFlags, uri2prefix, s);
                    s.append(")");
                }
            }
        }
    }

    protected void prettyPrintGroupByQueryPart(EnumSet<QueryStringPrintOptions> printFlags, int indentLevel,
            Map<String, String> uri2prefix, StringBuilder s) {
        if (groupByVars != null && !groupByVars.isEmpty()) {
            s.append("GROUP BY");
            for (Variable v : this.groupByVars) {
                s.append(" ");
                QueryController.printTriplePatternComponent(v, printFlags, uri2prefix, s);
            }
        }
    }

    private class GroupKey {
        // the bindings that define the group; for a particular query, the keySet of bindings will be the same for
        // all instances of GroupKey
        private final PatternSolution bindings;

        private final int hashCode;

        // each group has a single (aggregate/grouped) solution, which is maintained in the GroupKey
        private final PatternSolutionImpl aggregateSolution = new PatternSolutionImpl();

        // this constructor is for a group of all solutions
        public GroupKey() {
            this.hashCode = 0;
            this.bindings = null;
        }

        protected GroupKey(final PatternSolution bindings) {
            this.bindings = bindings;

            HashCodeBuilder builder = new HashCodeBuilder();

            for (Variable var : Projection.this.groupByVars) {
                // check if we're grouping by an alias, in which case we actually need to evalutae
                // the associated expression to find the grouping value here
                Expression e = Projection.this.aliasMap.get(var);
                Value val = null;
                if (e != null)
                    val = e.evaluate(bindings, null);
                else
                    val = bindings.getBinding(var);
                if (val != null) {
                    builder.append(val.hashCode());
                }
            }

            this.hashCode = builder.toHashCode();
        }

        @Override
        public boolean equals(Object obj) {
            if (!(obj instanceof GroupKey))
                return false;
            GroupKey other = (GroupKey) obj;

            if (this.hashCode == 0)
                return other.hashCode == 0;
            if (other.hashCode == 0)
                return false;

            for (Variable var : Projection.this.groupByVars) {
                Value thisTerm = this.bindings.getBinding(var);
                Value otherTerm = other.bindings.getBinding(var);
                if (thisTerm == null && otherTerm == null)
                    continue;
                else if (thisTerm == null || otherTerm == null) {
                    return false;
                } else if (!thisTerm.equals(otherTerm))
                    return false;
            }
            return true;
        }

        @Override
        public int hashCode() {
            return hashCode;
        }
    }
}