annis.sqlgen.AnnotateSqlGenerator.java Source code

Java tutorial

Introduction

Here is the source code for annis.sqlgen.AnnotateSqlGenerator.java

Source

/*
 * Copyright 2009-2011 Collaborative Research Centre SFB 632
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package annis.sqlgen;

import annis.model.QueryAnnotation;
import static annis.sqlgen.TableAccessStrategy.*;

import java.util.HashSet;
import java.util.List;
import java.util.Set;

import org.apache.commons.lang3.StringUtils;
import org.springframework.jdbc.core.JdbcTemplate;

import annis.model.QueryNode;
import annis.ql.parser.QueryData;
import annis.service.objects.SubgraphFilter;
import static annis.sqlgen.AbstractSqlGenerator.TABSTOP;
import static annis.sqlgen.SqlConstraints.sqlString;
import annis.sqlgen.extensions.AnnotateQueryData;
import annis.sqlgen.extensions.LimitOffsetQueryData;
import com.google.common.base.Joiner;
import com.google.common.base.Splitter;
import static com.google.common.collect.Multimaps.index;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.LinkedHashSet;
import java.util.LinkedList;
import org.springframework.dao.DataAccessException;
import org.springframework.jdbc.core.ResultSetExtractor;

/**
 *
 * @param <T> Type into which the JDBC result set is transformed
 *
 * @author Thomas Krause <krauseto@hu-berlin.de>
 */
public class AnnotateSqlGenerator<T> extends AbstractSqlGenerator implements SelectClauseSqlGenerator<QueryData>,
        FromClauseSqlGenerator<QueryData>, WhereClauseSqlGenerator<QueryData>, OrderByClauseSqlGenerator<QueryData>,
        AnnotateExtractor<T>, SqlGeneratorAndExtractor<QueryData, T> {

    // include document name in SELECT clause
    private boolean includeDocumentNameInAnnotateQuery;
    // include is_token column in SELECT clause
    private boolean includeIsTokenColumn;
    private TableAccessStrategy outerQueryTableAccessStrategy;
    private ResultSetExtractor<T> resultExtractor;
    // helper to extract the corpus path from a JDBC result set
    private CorpusPathExtractor corpusPathExtractor;
    private String matchedNodesViewName;

    public AnnotateSqlGenerator() {
    }

    /**
     * Create a solution key to be used inside a single call to
     * {@code extractData}.
     *
     * This method must be overridden in child classes or by Spring.
     * @return 
     */
    protected SolutionKey<?> createSolutionKey() {
        throw new UnsupportedOperationException(
                "BUG: This method needs to be overwritten by ancestors or through Spring");
    }

    public T queryAnnotationGraph(JdbcTemplate jdbcTemplate, long toplevelCorpusID, String documentName,
            List<String> nodeAnnotationFilter) {
        return (T) jdbcTemplate.query(getDocumentQuery(toplevelCorpusID, documentName, nodeAnnotationFilter), this);
    }

    public String getMatchedNodesViewName() {
        return matchedNodesViewName;
    }

    public void setMatchedNodesViewName(String matchedNodesViewName) {
        this.matchedNodesViewName = matchedNodesViewName;
    }

    protected void addSelectClauseAttribute(List<String> fields, String table, String column) {
        TableAccessStrategy tas = tables(null);
        fields.add(tas.aliasedColumn(table, column) + " AS \""
                + outerQueryTableAccessStrategy.columnName(table, column) + "\"");
    }

    @Override
    public Set<String> whereConditions(QueryData queryData, List<QueryNode> alternative, String indent) {

        HashSet<String> result = new HashSet<>();

        TableAccessStrategy tables = tables(null);

        List<AnnotateQueryData> annoExtList = queryData.getExtensions(AnnotateQueryData.class);
        if (!annoExtList.isEmpty()) {
            AnnotateQueryData annoExt = annoExtList.get(0);

            if (annoExt.getFilter() == SubgraphFilter.token) {
                result.add(tables.aliasedColumn(NODE_TABLE, "is_token") + " IS TRUE");
            }
        }

        StringBuilder sb = new StringBuilder();

        // restrict node table to corpus list
        List<Long> corpusList = queryData.getCorpusList();
        if (corpusList != null && !corpusList.isEmpty()) {
            sb.append(indent);
            sb.append(tables.aliasedColumn(NODE_TABLE, "toplevel_corpus"));
            sb.append(" IN (");
            sb.append(StringUtils.join(corpusList, ", "));
            sb.append(") AND\n");

            if (!tables.isMaterialized(RANK_TABLE, NODE_TABLE)) {
                sb.append(indent);
                sb.append(tables.aliasedColumn(RANK_TABLE, "toplevel_corpus"));
                sb.append(" IN (");
                sb.append(StringUtils.join(corpusList, ", "));
                sb.append(") AND\n");
            }
        }

        String overlap = CommonAnnotateWithClauseGenerator.overlapForOneRange(indent + TABSTOP, "solutions.\"min\"",
                "solutions.\"max\"", "solutions.text", "solutions.corpus", tables);
        sb.append(overlap);
        sb.append("\n");

        // corpus constriction
        sb.append(" AND\n");
        sb.append(indent).append(TABSTOP);
        sb.append(tables.aliasedColumn(CORPUS_TABLE, "id"));
        sb.append(" = ");
        sb.append(tables.aliasedColumn(NODE_TABLE, "corpus_ref"));

        result.add(sb.toString());

        return result;
    }

    @Override
    public String orderByClause(QueryData queryData, List<QueryNode> alternative, String indent) {
        StringBuilder sb = new StringBuilder();
        sb.append("solutions.n, ");
        sb.append(tables(null).aliasedColumn(COMPONENT_TABLE, "name")).append(", ");
        sb.append(tables(null).aliasedColumn(COMPONENT_TABLE, "id")).append(", ");
        String preColumn = tables(null).aliasedColumn(RANK_TABLE, "pre");
        sb.append(preColumn);
        String orderByClause = sb.toString();
        return orderByClause;
    }

    @Override
    public String fromClause(QueryData queryData, List<QueryNode> alternative, String indent) {
        TableAccessStrategy tas = tables(null);
        List<Long> corpusList = queryData.getCorpusList();
        StringBuilder sb = new StringBuilder();

        sb.append(indent).append("solutions,\n");

        sb.append(indent).append(TABSTOP);
        sb.append(AbstractFromClauseGenerator.tableAliasDefinition(tas, null, NODE_TABLE, 1, corpusList));
        sb.append(",\n");

        sb.append(indent).append(TABSTOP);
        sb.append(TableAccessStrategy.CORPUS_TABLE);

        return sb.toString();
    }

    @Override
    public String selectClause(QueryData queryData, List<QueryNode> alternative, String indent) {
        String innerIndent = indent + TABSTOP;
        StringBuilder sb = new StringBuilder();

        sb.append("DISTINCT\n");

        sb.append(innerIndent).append("solutions.\"key\",\n");
        sb.append(innerIndent);

        int matchStart = 0;
        List<LimitOffsetQueryData> extension = queryData.getExtensions(LimitOffsetQueryData.class);
        if (extension.size() > 0) {
            matchStart = extension.get(0).getOffset();
        }

        sb.append(matchStart).append(" AS \"matchstart\",\n");
        sb.append(innerIndent).append("solutions.n,\n");

        List<String> fields = getSelectFields();

        sb.append(innerIndent).append(StringUtils.join(fields, ",\n" + innerIndent));
        sb.append(innerIndent).append(",\n");

        // corpus.path_name
        sb.append(innerIndent).append("corpus.path_name AS path");

        if (isIncludeDocumentNameInAnnotateQuery()) {
            sb.append(",\n");
            sb.append(innerIndent).append("corpus.path_name[1] AS document_name");
        }
        return sb.toString();
    }

    protected List<String> getSelectFields() {
        List<String> fields = new ArrayList<>();

        addSelectClauseAttribute(fields, NODE_TABLE, "id");
        addSelectClauseAttribute(fields, NODE_TABLE, "text_ref");
        addSelectClauseAttribute(fields, NODE_TABLE, "corpus_ref");
        addSelectClauseAttribute(fields, NODE_TABLE, "toplevel_corpus");
        addSelectClauseAttribute(fields, NODE_TABLE, "namespace");
        addSelectClauseAttribute(fields, NODE_TABLE, "name");
        addSelectClauseAttribute(fields, NODE_TABLE, "salt_id");
        addSelectClauseAttribute(fields, NODE_TABLE, "left");
        addSelectClauseAttribute(fields, NODE_TABLE, "right");
        addSelectClauseAttribute(fields, NODE_TABLE, "token_index");
        if (isIncludeIsTokenColumn()) {
            addSelectClauseAttribute(fields, NODE_TABLE, "is_token");
        }
        addSelectClauseAttribute(fields, NODE_TABLE, "span");
        addSelectClauseAttribute(fields, NODE_TABLE, "left_token");
        addSelectClauseAttribute(fields, NODE_TABLE, "right_token");
        addSelectClauseAttribute(fields, NODE_TABLE, "seg_name");
        addSelectClauseAttribute(fields, NODE_TABLE, "seg_index");
        addSelectClauseAttribute(fields, RANK_TABLE, "id");
        addSelectClauseAttribute(fields, RANK_TABLE, "pre");
        addSelectClauseAttribute(fields, RANK_TABLE, "post");
        addSelectClauseAttribute(fields, RANK_TABLE, "parent");
        addSelectClauseAttribute(fields, RANK_TABLE, "root");
        addSelectClauseAttribute(fields, RANK_TABLE, "level");
        addSelectClauseAttribute(fields, COMPONENT_TABLE, "id");
        addSelectClauseAttribute(fields, COMPONENT_TABLE, "type");
        addSelectClauseAttribute(fields, COMPONENT_TABLE, "name");
        addSelectClauseAttribute(fields, COMPONENT_TABLE, "namespace");

        fields.add("(splitanno(node_qannotext))[1] as node_annotation_namespace");
        fields.add("(splitanno(node_qannotext))[2] as node_annotation_name");
        fields.add("(splitanno(node_qannotext))[3] as node_annotation_value");

        fields.add("(splitanno(edge_qannotext))[1] as edge_annotation_namespace");
        fields.add("(splitanno(edge_qannotext))[2] as edge_annotation_name");
        fields.add("(splitanno(edge_qannotext))[3] as edge_annotation_value");

        return fields;
    }

    public String getDocumentQuery(long toplevelCorpusID, String documentName, List<String> nodeAnnotationFilter) {
        TableAccessStrategy tas = createTableAccessStrategy();
        List<String> fields = getSelectFields();

        boolean filter = false;
        Set<String> qualifiedNodeAnnos = new LinkedHashSet<>();
        Set<String> unqualifiedNodeAnnos = new LinkedHashSet<>();
        if (nodeAnnotationFilter != null) {
            Splitter namespaceSplitter = Splitter.on("::").trimResults().limit(2);
            filter = true;
            for (String anno : nodeAnnotationFilter) {
                List<String> splitted = namespaceSplitter.splitToList(anno);
                if (splitted.size() > 1) {
                    qualifiedNodeAnnos.add(AnnotationConditionProvider.regexEscaper.escape(splitted.get(0)) + ":"
                            + AnnotationConditionProvider.regexEscaper.escape(splitted.get(1)));
                } else {
                    unqualifiedNodeAnnos.add(AnnotationConditionProvider.regexEscaper.escape(splitted.get(0)));
                }
            }
        }

        StringBuilder template = new StringBuilder();
        template.append("SELECT DISTINCT \n"
                + "\tARRAY[-1::bigint] AS key, ARRAY[''::varchar] AS key_names, 0 as matchstart, "
                + StringUtils.join(fields, ", ") + ", " + "c.path_name as path, c.path_name[1] as document_name\n"
                + "FROM\n" + "\tfacts_:top AS facts,\n" + "\tcorpus as c, corpus as toplevel\n" + "WHERE\n"
                + "\ttoplevel.id = :top AND c.name = :document_name AND "
                + tas.aliasedColumn(NODE_TABLE, "corpus_ref") + " = c.id\n" + "\tAND toplevel.top_level IS TRUE\n"
                + "\tAND c.pre >= toplevel.pre AND c.post <= toplevel.post\n");

        if (filter) {

            template.append("\tAND (is_token IS TRUE");

            if (!qualifiedNodeAnnos.isEmpty()) {
                String orExpr = Joiner.on(")|(").join(qualifiedNodeAnnos);
                template.append(" OR node_qannotext ~ '(^((").append(orExpr).append(")):(.*)$)' ");
            }
            if (!unqualifiedNodeAnnos.isEmpty()) {
                String orExpr = Joiner.on(")|(").join(unqualifiedNodeAnnos);
                template.append(" OR node_annotext ~ '(^((").append(orExpr).append(")):(.*)$)' ");
            }
            template.append(")\n");
        }

        template.append("ORDER BY ").append(tas.aliasedColumn(COMPONENT_TABLE, "name")).append(", ")
                .append(tas.aliasedColumn(COMPONENT_TABLE, "id")).append(", ")
                .append(tas.aliasedColumn(RANK_TABLE, "pre"));
        String sql = template.toString().replace(":top", "" + toplevelCorpusID).replace(":document_name",
                sqlString(documentName));
        return sql;

    }

    @Override
    public T extractData(ResultSet resultSet) throws SQLException, DataAccessException {
        return resultExtractor.extractData(resultSet);
    }

    public TableAccessStrategy getOuterQueryTableAccessStrategy() {
        return outerQueryTableAccessStrategy;
    }

    public boolean isIncludeDocumentNameInAnnotateQuery() {
        return includeDocumentNameInAnnotateQuery;
    }

    public void setIncludeDocumentNameInAnnotateQuery(boolean includeDocumentNameInAnnotateQuery) {
        this.includeDocumentNameInAnnotateQuery = includeDocumentNameInAnnotateQuery;
    }

    public boolean isIncludeIsTokenColumn() {
        return includeIsTokenColumn;
    }

    public void setIncludeIsTokenColumn(boolean includeIsTokenColumn) {
        this.includeIsTokenColumn = includeIsTokenColumn;
    }

    public CorpusPathExtractor getCorpusPathExtractor() {
        return corpusPathExtractor;
    }

    public void setCorpusPathExtractor(CorpusPathExtractor corpusPathExtractor) {
        this.corpusPathExtractor = corpusPathExtractor;
    }

    @Override
    public void setOuterQueryTableAccessStrategy(TableAccessStrategy outerQueryTableAccessStrategy) {
        this.outerQueryTableAccessStrategy = outerQueryTableAccessStrategy;
    }

    public ResultSetExtractor<T> getResultExtractor() {
        return resultExtractor;
    }

    public void setResultExtractor(ResultSetExtractor<T> resultExtractor) {
        this.resultExtractor = resultExtractor;
    }

}