Java tutorial
/* * Copyright 2009-2011 Collaborative Research Centre SFB 632 * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package annis.sqlgen; import annis.model.QueryAnnotation; import static annis.sqlgen.TableAccessStrategy.*; import java.util.HashSet; import java.util.List; import java.util.Set; import org.apache.commons.lang3.StringUtils; import org.springframework.jdbc.core.JdbcTemplate; import annis.model.QueryNode; import annis.ql.parser.QueryData; import annis.service.objects.SubgraphFilter; import static annis.sqlgen.AbstractSqlGenerator.TABSTOP; import static annis.sqlgen.SqlConstraints.sqlString; import annis.sqlgen.extensions.AnnotateQueryData; import annis.sqlgen.extensions.LimitOffsetQueryData; import com.google.common.base.Joiner; import com.google.common.base.Splitter; import static com.google.common.collect.Multimaps.index; import java.sql.ResultSet; import java.sql.SQLException; import java.util.ArrayList; import java.util.LinkedHashSet; import java.util.LinkedList; import org.springframework.dao.DataAccessException; import org.springframework.jdbc.core.ResultSetExtractor; /** * * @param <T> Type into which the JDBC result set is transformed * * @author Thomas Krause <krauseto@hu-berlin.de> */ public class AnnotateSqlGenerator<T> extends AbstractSqlGenerator implements SelectClauseSqlGenerator<QueryData>, FromClauseSqlGenerator<QueryData>, WhereClauseSqlGenerator<QueryData>, OrderByClauseSqlGenerator<QueryData>, AnnotateExtractor<T>, SqlGeneratorAndExtractor<QueryData, T> { // include document name in SELECT clause private boolean includeDocumentNameInAnnotateQuery; // include is_token column in SELECT clause private boolean includeIsTokenColumn; private TableAccessStrategy outerQueryTableAccessStrategy; private ResultSetExtractor<T> resultExtractor; // helper to extract the corpus path from a JDBC result set private CorpusPathExtractor corpusPathExtractor; private String matchedNodesViewName; public AnnotateSqlGenerator() { } /** * Create a solution key to be used inside a single call to * {@code extractData}. * * This method must be overridden in child classes or by Spring. * @return */ protected SolutionKey<?> createSolutionKey() { throw new UnsupportedOperationException( "BUG: This method needs to be overwritten by ancestors or through Spring"); } public T queryAnnotationGraph(JdbcTemplate jdbcTemplate, long toplevelCorpusID, String documentName, List<String> nodeAnnotationFilter) { return (T) jdbcTemplate.query(getDocumentQuery(toplevelCorpusID, documentName, nodeAnnotationFilter), this); } public String getMatchedNodesViewName() { return matchedNodesViewName; } public void setMatchedNodesViewName(String matchedNodesViewName) { this.matchedNodesViewName = matchedNodesViewName; } protected void addSelectClauseAttribute(List<String> fields, String table, String column) { TableAccessStrategy tas = tables(null); fields.add(tas.aliasedColumn(table, column) + " AS \"" + outerQueryTableAccessStrategy.columnName(table, column) + "\""); } @Override public Set<String> whereConditions(QueryData queryData, List<QueryNode> alternative, String indent) { HashSet<String> result = new HashSet<>(); TableAccessStrategy tables = tables(null); List<AnnotateQueryData> annoExtList = queryData.getExtensions(AnnotateQueryData.class); if (!annoExtList.isEmpty()) { AnnotateQueryData annoExt = annoExtList.get(0); if (annoExt.getFilter() == SubgraphFilter.token) { result.add(tables.aliasedColumn(NODE_TABLE, "is_token") + " IS TRUE"); } } StringBuilder sb = new StringBuilder(); // restrict node table to corpus list List<Long> corpusList = queryData.getCorpusList(); if (corpusList != null && !corpusList.isEmpty()) { sb.append(indent); sb.append(tables.aliasedColumn(NODE_TABLE, "toplevel_corpus")); sb.append(" IN ("); sb.append(StringUtils.join(corpusList, ", ")); sb.append(") AND\n"); if (!tables.isMaterialized(RANK_TABLE, NODE_TABLE)) { sb.append(indent); sb.append(tables.aliasedColumn(RANK_TABLE, "toplevel_corpus")); sb.append(" IN ("); sb.append(StringUtils.join(corpusList, ", ")); sb.append(") AND\n"); } } String overlap = CommonAnnotateWithClauseGenerator.overlapForOneRange(indent + TABSTOP, "solutions.\"min\"", "solutions.\"max\"", "solutions.text", "solutions.corpus", tables); sb.append(overlap); sb.append("\n"); // corpus constriction sb.append(" AND\n"); sb.append(indent).append(TABSTOP); sb.append(tables.aliasedColumn(CORPUS_TABLE, "id")); sb.append(" = "); sb.append(tables.aliasedColumn(NODE_TABLE, "corpus_ref")); result.add(sb.toString()); return result; } @Override public String orderByClause(QueryData queryData, List<QueryNode> alternative, String indent) { StringBuilder sb = new StringBuilder(); sb.append("solutions.n, "); sb.append(tables(null).aliasedColumn(COMPONENT_TABLE, "name")).append(", "); sb.append(tables(null).aliasedColumn(COMPONENT_TABLE, "id")).append(", "); String preColumn = tables(null).aliasedColumn(RANK_TABLE, "pre"); sb.append(preColumn); String orderByClause = sb.toString(); return orderByClause; } @Override public String fromClause(QueryData queryData, List<QueryNode> alternative, String indent) { TableAccessStrategy tas = tables(null); List<Long> corpusList = queryData.getCorpusList(); StringBuilder sb = new StringBuilder(); sb.append(indent).append("solutions,\n"); sb.append(indent).append(TABSTOP); sb.append(AbstractFromClauseGenerator.tableAliasDefinition(tas, null, NODE_TABLE, 1, corpusList)); sb.append(",\n"); sb.append(indent).append(TABSTOP); sb.append(TableAccessStrategy.CORPUS_TABLE); return sb.toString(); } @Override public String selectClause(QueryData queryData, List<QueryNode> alternative, String indent) { String innerIndent = indent + TABSTOP; StringBuilder sb = new StringBuilder(); sb.append("DISTINCT\n"); sb.append(innerIndent).append("solutions.\"key\",\n"); sb.append(innerIndent); int matchStart = 0; List<LimitOffsetQueryData> extension = queryData.getExtensions(LimitOffsetQueryData.class); if (extension.size() > 0) { matchStart = extension.get(0).getOffset(); } sb.append(matchStart).append(" AS \"matchstart\",\n"); sb.append(innerIndent).append("solutions.n,\n"); List<String> fields = getSelectFields(); sb.append(innerIndent).append(StringUtils.join(fields, ",\n" + innerIndent)); sb.append(innerIndent).append(",\n"); // corpus.path_name sb.append(innerIndent).append("corpus.path_name AS path"); if (isIncludeDocumentNameInAnnotateQuery()) { sb.append(",\n"); sb.append(innerIndent).append("corpus.path_name[1] AS document_name"); } return sb.toString(); } protected List<String> getSelectFields() { List<String> fields = new ArrayList<>(); addSelectClauseAttribute(fields, NODE_TABLE, "id"); addSelectClauseAttribute(fields, NODE_TABLE, "text_ref"); addSelectClauseAttribute(fields, NODE_TABLE, "corpus_ref"); addSelectClauseAttribute(fields, NODE_TABLE, "toplevel_corpus"); addSelectClauseAttribute(fields, NODE_TABLE, "namespace"); addSelectClauseAttribute(fields, NODE_TABLE, "name"); addSelectClauseAttribute(fields, NODE_TABLE, "salt_id"); addSelectClauseAttribute(fields, NODE_TABLE, "left"); addSelectClauseAttribute(fields, NODE_TABLE, "right"); addSelectClauseAttribute(fields, NODE_TABLE, "token_index"); if (isIncludeIsTokenColumn()) { addSelectClauseAttribute(fields, NODE_TABLE, "is_token"); } addSelectClauseAttribute(fields, NODE_TABLE, "span"); addSelectClauseAttribute(fields, NODE_TABLE, "left_token"); addSelectClauseAttribute(fields, NODE_TABLE, "right_token"); addSelectClauseAttribute(fields, NODE_TABLE, "seg_name"); addSelectClauseAttribute(fields, NODE_TABLE, "seg_index"); addSelectClauseAttribute(fields, RANK_TABLE, "id"); addSelectClauseAttribute(fields, RANK_TABLE, "pre"); addSelectClauseAttribute(fields, RANK_TABLE, "post"); addSelectClauseAttribute(fields, RANK_TABLE, "parent"); addSelectClauseAttribute(fields, RANK_TABLE, "root"); addSelectClauseAttribute(fields, RANK_TABLE, "level"); addSelectClauseAttribute(fields, COMPONENT_TABLE, "id"); addSelectClauseAttribute(fields, COMPONENT_TABLE, "type"); addSelectClauseAttribute(fields, COMPONENT_TABLE, "name"); addSelectClauseAttribute(fields, COMPONENT_TABLE, "namespace"); fields.add("(splitanno(node_qannotext))[1] as node_annotation_namespace"); fields.add("(splitanno(node_qannotext))[2] as node_annotation_name"); fields.add("(splitanno(node_qannotext))[3] as node_annotation_value"); fields.add("(splitanno(edge_qannotext))[1] as edge_annotation_namespace"); fields.add("(splitanno(edge_qannotext))[2] as edge_annotation_name"); fields.add("(splitanno(edge_qannotext))[3] as edge_annotation_value"); return fields; } public String getDocumentQuery(long toplevelCorpusID, String documentName, List<String> nodeAnnotationFilter) { TableAccessStrategy tas = createTableAccessStrategy(); List<String> fields = getSelectFields(); boolean filter = false; Set<String> qualifiedNodeAnnos = new LinkedHashSet<>(); Set<String> unqualifiedNodeAnnos = new LinkedHashSet<>(); if (nodeAnnotationFilter != null) { Splitter namespaceSplitter = Splitter.on("::").trimResults().limit(2); filter = true; for (String anno : nodeAnnotationFilter) { List<String> splitted = namespaceSplitter.splitToList(anno); if (splitted.size() > 1) { qualifiedNodeAnnos.add(AnnotationConditionProvider.regexEscaper.escape(splitted.get(0)) + ":" + AnnotationConditionProvider.regexEscaper.escape(splitted.get(1))); } else { unqualifiedNodeAnnos.add(AnnotationConditionProvider.regexEscaper.escape(splitted.get(0))); } } } StringBuilder template = new StringBuilder(); template.append("SELECT DISTINCT \n" + "\tARRAY[-1::bigint] AS key, ARRAY[''::varchar] AS key_names, 0 as matchstart, " + StringUtils.join(fields, ", ") + ", " + "c.path_name as path, c.path_name[1] as document_name\n" + "FROM\n" + "\tfacts_:top AS facts,\n" + "\tcorpus as c, corpus as toplevel\n" + "WHERE\n" + "\ttoplevel.id = :top AND c.name = :document_name AND " + tas.aliasedColumn(NODE_TABLE, "corpus_ref") + " = c.id\n" + "\tAND toplevel.top_level IS TRUE\n" + "\tAND c.pre >= toplevel.pre AND c.post <= toplevel.post\n"); if (filter) { template.append("\tAND (is_token IS TRUE"); if (!qualifiedNodeAnnos.isEmpty()) { String orExpr = Joiner.on(")|(").join(qualifiedNodeAnnos); template.append(" OR node_qannotext ~ '(^((").append(orExpr).append(")):(.*)$)' "); } if (!unqualifiedNodeAnnos.isEmpty()) { String orExpr = Joiner.on(")|(").join(unqualifiedNodeAnnos); template.append(" OR node_annotext ~ '(^((").append(orExpr).append(")):(.*)$)' "); } template.append(")\n"); } template.append("ORDER BY ").append(tas.aliasedColumn(COMPONENT_TABLE, "name")).append(", ") .append(tas.aliasedColumn(COMPONENT_TABLE, "id")).append(", ") .append(tas.aliasedColumn(RANK_TABLE, "pre")); String sql = template.toString().replace(":top", "" + toplevelCorpusID).replace(":document_name", sqlString(documentName)); return sql; } @Override public T extractData(ResultSet resultSet) throws SQLException, DataAccessException { return resultExtractor.extractData(resultSet); } public TableAccessStrategy getOuterQueryTableAccessStrategy() { return outerQueryTableAccessStrategy; } public boolean isIncludeDocumentNameInAnnotateQuery() { return includeDocumentNameInAnnotateQuery; } public void setIncludeDocumentNameInAnnotateQuery(boolean includeDocumentNameInAnnotateQuery) { this.includeDocumentNameInAnnotateQuery = includeDocumentNameInAnnotateQuery; } public boolean isIncludeIsTokenColumn() { return includeIsTokenColumn; } public void setIncludeIsTokenColumn(boolean includeIsTokenColumn) { this.includeIsTokenColumn = includeIsTokenColumn; } public CorpusPathExtractor getCorpusPathExtractor() { return corpusPathExtractor; } public void setCorpusPathExtractor(CorpusPathExtractor corpusPathExtractor) { this.corpusPathExtractor = corpusPathExtractor; } @Override public void setOuterQueryTableAccessStrategy(TableAccessStrategy outerQueryTableAccessStrategy) { this.outerQueryTableAccessStrategy = outerQueryTableAccessStrategy; } public ResultSetExtractor<T> getResultExtractor() { return resultExtractor; } public void setResultExtractor(ResultSetExtractor<T> resultExtractor) { this.resultExtractor = resultExtractor; } }