Java tutorial
/* * Copyright 2009-2011 Collaborative Research Centre SFB 632 * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package annis.administration; import annis.dao.autogenqueries.QueriesGenerator; import annis.examplequeries.ExampleQuery; import annis.exceptions.AnnisException; import annis.model.QueryNode; import annis.ql.parser.QueryData; import annis.security.UserConfig; import com.google.common.base.Charsets; import com.google.common.base.Preconditions; import com.google.common.base.Splitter; import com.google.common.collect.Multimap; import com.google.common.collect.TreeMultimap; import com.google.common.io.Files; import java.io.*; import java.net.URI; import java.net.URISyntaxException; import java.sql.Connection; import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.SQLException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.HashSet; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Properties; import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; import javax.sql.DataSource; import org.apache.commons.dbcp2.BasicDataSource; import org.apache.commons.dbcp2.DelegatingConnection; import org.apache.commons.io.FilenameUtils; import org.apache.commons.io.filefilter.DirectoryFileFilter; import org.apache.commons.io.filefilter.FileFileFilter; import org.apache.commons.lang3.StringUtils; import org.codehaus.jackson.map.AnnotationIntrospector; import org.codehaus.jackson.map.ObjectMapper; import org.codehaus.jackson.map.SerializationConfig; import org.codehaus.jackson.xc.JaxbAnnotationIntrospector; import org.postgresql.PGConnection; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.core.io.FileSystemResource; import org.springframework.core.io.Resource; import org.springframework.dao.DataAccessException; import org.springframework.jdbc.core.ResultSetExtractor; import org.springframework.jdbc.core.RowMapper; import org.springframework.jdbc.core.namedparam.MapSqlParameterSource; import org.springframework.jdbc.core.simple.ParameterizedSingleColumnRowMapper; import org.springframework.jdbc.datasource.DataSourceUtils; import org.springframework.transaction.annotation.Isolation; import org.springframework.transaction.annotation.Propagation; import org.springframework.transaction.annotation.Transactional; /** * */ public class AdministrationDao extends AbstractAdminstrationDao { private static final Logger log = LoggerFactory.getLogger(AdministrationDao.class); // if this is true, the staging area is not deleted private boolean temporaryStagingArea; private DeleteCorpusDao deleteCorpusDao; /** * Searches for textes which are empty or only contains whitespaces. If that * is the case the visualizer and no document visualizer are defined in the * corpus properties file a new file is created and stores a new config which * disables document browsing. * * * @param corpusID The id of the corpus which texts are analyzed. */ private void analyzeTextTable(String toplevelCorpusName) { List<String> rawTexts = getAnnisDao().getRawText(toplevelCorpusName); // pattern for checking the token layer final Pattern WHITESPACE_MATCHER = Pattern.compile("^\\s+$"); for (String s : rawTexts) { if (s != null && WHITESPACE_MATCHER.matcher(s).matches()) { // deactivate doc browsing if no document browser configuration is exists if (getAnnisDao().getDocBrowserConfiguration(toplevelCorpusName) == null) { // should exists anyway Properties corpusConf; try { corpusConf = getAnnisDao().getCorpusConfiguration(toplevelCorpusName); } catch (FileNotFoundException ex) { log.error("not found a corpus configuration, so skip analyzing the text table", ex); return; } // disable document browsing if it is not explicit switch on by the // user in the corpus.properties boolean hasKey = corpusConf.containsKey("browse-documents"); boolean isActive = Boolean.parseBoolean(corpusConf.getProperty("browse-documents")); if (!(hasKey && isActive)) { log.info("disable document browser"); corpusConf.put("browse-documents", "false"); getAnnisDao().setCorpusConfiguration(toplevelCorpusName, corpusConf); } // once disabled don't search in further texts return; } } } } public ImportStatus initImportStatus() { return new CorpusAdministration.ImportStatsImpl(); } public enum EXAMPLE_QUERIES_CONFIG { IF_MISSING, TRUE, FALSE } /** * If this is true and no example_queries.tab is found, automatic queries are * generated. */ private EXAMPLE_QUERIES_CONFIG generateExampleQueries; private String schemaVersion; /** * A mapping for file-endings to mime types. */ private Map<String, String> mimeTypeMapping; private Map<String, String> tableInsertSelect; private Map<String, String> tableInsertFrom; // all files have to carry this suffix. private String annisFileSuffix = ".annis"; /** * Optional tab for example queries. If this tab not exist, a dummy file from * the resource folder is used. */ private static final String EXAMPLE_QUERIES_TAB = "example_queries"; /** * The name of the file and the relation containing the resolver information. */ private static final String FILE_RESOLVER_VIS_MAP = "resolver_vis_map"; // tables imported from bulk files // DO NOT CHANGE THE ORDER OF THIS LIST! Doing so may cause foreign key failures during import. private final String[] importedTables = { "corpus", "corpus_annotation", "text", "node", "node_annotation", "component", "rank", "edge_annotation", FILE_RESOLVER_VIS_MAP, EXAMPLE_QUERIES_TAB }; private final String[] tablesToCopyManually = { "corpus", "corpus_annotation", "text", FILE_RESOLVER_VIS_MAP, EXAMPLE_QUERIES_TAB, "corpus_stats", "media_files" }; // tables created during import private final String[] createdTables = { "corpus_stats", "media_files", "nodeidmapping" }; private final ObjectMapper jsonMapper = new ObjectMapper(); private QueriesGenerator queriesGenerator; /** * Called when Spring configuration finished */ public void init() { AnnotationIntrospector introspector = new JaxbAnnotationIntrospector(); jsonMapper.setAnnotationIntrospector(introspector); // the json should be as compact as possible in the database jsonMapper.configure(SerializationConfig.Feature.INDENT_OUTPUT, false); } ///// Subtasks of creating the database protected void dropDatabase(String database) { log.debug("dropping possible existing database"); closeAllConnections(database); getJdbcTemplate().execute("DROP DATABASE IF EXISTS " + database); } protected void dropUser(String username) { log.debug("dropping possible existing user"); getJdbcTemplate().execute("DROP USER IF EXISTS " + username); } protected void createUser(String username, String password) { log.info("creating user: " + username); getJdbcTemplate().execute("CREATE USER " + username + " PASSWORD '" + password + "'"); } protected void createDatabase(String database, String owner) { log.info("creating database: " + database + " OWNER " + owner + " ENCODING = 'UTF8'"); getJdbcTemplate().execute("CREATE DATABASE " + database + " OWNER " + owner + " ENCODING = 'UTF8'"); } protected void installPlPgSql() { log.info("installing stored procedure language plpgsql"); try { getJdbcTemplate().execute("CREATE LANGUAGE plpgsql"); } catch (Exception ex) { log.warn("plpqsql was already installed: " + ex.getMessage()); } } protected void createFunctionUniqueToplevelCorpusName() { log.info("creating trigger function: unique_toplevel_corpus_name"); executeSqlFromScript("unique_toplevel_corpus_name.sql"); } protected void createSchema() { log.info("creating ANNIS database schema (" + getSchemaVersion() + ")"); executeSqlFromScript("schema.sql"); // update schema version getJdbcTemplate().execute("DELETE FROM repository_metadata WHERE \"name\"='schema-version'"); getJdbcTemplate().execute("INSERT INTO repository_metadata " + "VALUES ('schema-version', '" + StringUtils.replace(getSchemaVersion(), "'", "''") + "');"); } protected void createSchemaIndexes() { log.info("creating ANNIS database schema indexes (" + getDatabaseSchemaVersion() + ")"); executeSqlFromScript("schemaindex.sql"); } protected void populateSchema() { log.info("populating the schemas with default values"); bulkloadTableFromResource("resolver_vis_map", new FileSystemResource(new File(getScriptPath(), FILE_RESOLVER_VIS_MAP + annisFileSuffix))); // update the sequence executeSqlFromScript("update_resolver_sequence.sql"); log.info("creating immutable functions for extracting annotations"); executeSqlFromScript("functions_get.sql"); } /** * Get the real schema name and version as used by the database. * * @return */ @Transactional(readOnly = true, propagation = Propagation.REQUIRED) public String getDatabaseSchemaVersion() { try { List<Map<String, Object>> result = getJdbcTemplate() .queryForList("SELECT \"value\" FROM repository_metadata WHERE \"name\"='schema-version'"); String schema = result.size() > 0 ? (String) result.get(0).get("value") : ""; return schema; } catch (DataAccessException ex) { String error = "Wrong database schema (too old to get the exact number), " + "please initialize the database."; log.error(error); } return ""; } public boolean checkDatabaseSchemaVersion() throws AnnisException { String dbSchemaVersion = getDatabaseSchemaVersion(); if (getSchemaVersion() != null && !getSchemaVersion().equalsIgnoreCase(dbSchemaVersion)) { String error = "Wrong database schema \"" + dbSchemaVersion + "\", please initialize the database."; log.error(error); throw new AnnisException(error); } return true; } public void initializeDatabase(String host, String port, String database, String user, String password, String defaultDatabase, String superUser, String superPassword, boolean useSSL, String pgSchema) { // connect as super user to the default database to create new user and database if (superPassword != null) { log.info("Creating Annis database and user."); getDataSource().setInnerDataSource( createDataSource(host, port, defaultDatabase, superUser, superPassword, useSSL, pgSchema)); createDatabaseAndUser(database, user, password); } // switch to new database as new user for the rest of the initialization procedure getDataSource() .setInnerDataSource(createDataSource(host, port, database, user, password, useSSL, pgSchema)); // if (pgSchema != null && !"public".equalsIgnoreCase(pgSchema)) { pgSchema = pgSchema.toLowerCase().replaceAll("[^a-z0-9]", "_"); log.info("creating PostgreSQL schema {}", pgSchema); // we have to create a schema before we can use it try { getJdbcTemplate().execute("CREATE SCHEMA " + pgSchema + ";"); } catch (DataAccessException ex) { // ignore if the schema already exists log.info("schema " + pgSchema + " already exists"); } } setupDatabase(); } private void createDatabaseAndUser(String database, String user, String password) { dropDatabase(database); dropUser(user); createUser(user, password); createDatabase(database, user); installPlPgSql(); } private void setupDatabase() { createFunctionUniqueToplevelCorpusName(); createSchema(); createSchemaIndexes(); populateSchema(); } private BasicDataSource createDataSource(File dbProperties) throws IOException, URISyntaxException { BasicDataSource result; Properties props = new Properties(); try (InputStream is = new FileInputStream(dbProperties)) { props.load(is); String rawJdbcURL = props.getProperty("datasource.url").trim(); rawJdbcURL = StringUtils.removeStart(rawJdbcURL, "jdbc:"); URI jdbcURL = new URI(rawJdbcURL); result = createDataSource(jdbcURL.getHost(), "" + jdbcURL.getPort(), jdbcURL.getPath().substring(1), // remove the "/" at the beginning props.getProperty("datasource.username"), props.getProperty("datasource.password"), "true".equalsIgnoreCase(props.getProperty("datasource.ssl")), props.getProperty("datasource.schema")); } return result; } private BasicDataSource createDataSource(String host, String port, String database, String user, String password, boolean useSSL, String schema) { String url = "jdbc:postgresql://" + host + ":" + port + "/" + database; // DriverManagerDataSource is deprecated // return new DriverManagerDataSource("org.postgresql.Driver", url, user, password); BasicDataSource result = new BasicDataSource(); result.setUrl(url); if (useSSL) { result.setConnectionProperties("ssl=true"); } result.setUsername(user); result.setPassword(password); result.setValidationQuery("SELECT 1;"); result.setAccessToUnderlyingConnectionAllowed(true); if (schema == null) { schema = "public"; } result.setConnectionInitSqls(Arrays.asList("SET search_path TO \"$user\"," + schema)); result.setDriverClassName("org.postgresql.Driver"); return result; } /** * Reads ANNIS files from several directories. * * @param path Specifies the path to the corpora, which should be imported. * @param aliasName An alias name for this corpus. Can be null. * @param overwrite If set to true conflicting top level corpora are deleted. * @param waitForOtherTasks If true wait for other tasks to finish, if false * abort. * * @return true if successful */ @Transactional(readOnly = false, propagation = Propagation.REQUIRES_NEW, isolation = Isolation.READ_COMMITTED) public boolean importCorpus(String path, String aliasName, boolean overwrite, boolean waitForOtherTasks) { // check schema version first checkDatabaseSchemaVersion(); if (!lockRepositoryMetadataTable(waitForOtherTasks)) { log.error("Another import is currently running"); return false; } // explicitly unset any timeout getJdbcTemplate().update("SET statement_timeout TO 0"); ANNISFormatVersion annisFormatVersion = getANNISFormatVersion(path); if (annisFormatVersion == ANNISFormatVersion.V3_3) { return importVersion4(path, aliasName, overwrite, annisFormatVersion); } else if (annisFormatVersion == ANNISFormatVersion.V3_1 || annisFormatVersion == ANNISFormatVersion.V3_2) { return importVersion3(path, aliasName, overwrite, annisFormatVersion); } log.error("Unknown ANNIS import format version"); return false; } private boolean importVersion4(String path, String aliasName, boolean overwrite, ANNISFormatVersion version) { this.annisFileSuffix = ".annis"; createStagingAreaV33(temporaryStagingArea); bulkImport(path, version); String toplevelCorpusName = getTopLevelCorpusFromTmpArea(); // remove conflicting top level corpora, when override is set to true. if (overwrite) { deleteCorpusDao.checkAndRemoveTopLevelCorpus(toplevelCorpusName); } else { checkTopLevelCorpus(); } applyConstraints(); createStagingAreaIndexes(version); fixResolverVisMapTable(toplevelCorpusName, tableInStagingArea(FILE_RESOLVER_VIS_MAP)); analyzeStagingTables(); addDocumentNameMetaData(); Offsets offsets = calculateOffsets(); long corpusID = getNewToplevelCorpusID(offsets); createNodeIdMapping(); importBinaryData(path, toplevelCorpusName); extendStagingText(corpusID); extendStagingExampleQueries(corpusID); analyzeAutoGeneratedQueries(corpusID); computeCorpusStatistics(path); analyzeStagingTables(); insertCorpus(corpusID, offsets); computeCorpusPath(corpusID); createAnnotations(corpusID); createAnnoCategory(corpusID); // create the new facts table partition createFacts(corpusID, version, offsets); if (temporaryStagingArea) { dropStagingArea(); } // create empty corpus properties file if (getAnnisDao().getCorpusConfigurationSave(toplevelCorpusName) == null) { log.info("creating new corpus.properties file"); getAnnisDao().setCorpusConfiguration(toplevelCorpusName, new Properties()); } analyzeFacts(corpusID); analyzeTextTable(toplevelCorpusName); generateExampleQueries(corpusID); if (aliasName != null && !aliasName.isEmpty()) { addCorpusAlias(corpusID, aliasName); } return true; } private boolean importVersion3(String path, String aliasName, boolean overwrite, ANNISFormatVersion version) { this.annisFileSuffix = ".tab"; createStagingAreaV32(temporaryStagingArea); bulkImport(path, version); String toplevelCorpusName = getTopLevelCorpusFromTmpArea(); // remove conflicting top level corpora, when override is set to true. if (overwrite) { deleteCorpusDao.checkAndRemoveTopLevelCorpus(toplevelCorpusName); } else { checkTopLevelCorpus(); } createStagingAreaIndexes(version); fixResolverVisMapTable(toplevelCorpusName, tableInStagingArea(FILE_RESOLVER_VIS_MAP)); computeTopLevelCorpus(); analyzeStagingTables(); computeLeftTokenRightToken(); removeUnecessarySpanningRelations(); addUniqueNodeNameAppendix(); adjustRankPrePost(); adjustTextId(); addDocumentNameMetaData(); Offsets offsets = calculateOffsets(); long corpusID = getNewToplevelCorpusID(offsets); createNodeIdMapping(); importBinaryData(path, toplevelCorpusName); extendStagingText(corpusID); extendStagingExampleQueries(corpusID); analyzeAutoGeneratedQueries(corpusID); computeRealRoot(); computeLevel(); computeCorpusStatistics(path); computeSpanFromSegmentation(); applyConstraints(); analyzeStagingTables(); insertCorpus(corpusID, offsets); computeCorpusPath(corpusID); createAnnotations(corpusID); createAnnoCategory(corpusID); // create the new facts table partition createFacts(corpusID, version, offsets); if (temporaryStagingArea) { dropStagingArea(); } // create empty corpus properties file if (getAnnisDao().getCorpusConfigurationSave(toplevelCorpusName) == null) { log.info("creating new corpus.properties file"); getAnnisDao().setCorpusConfiguration(toplevelCorpusName, new Properties()); } analyzeFacts(corpusID); analyzeTextTable(toplevelCorpusName); generateExampleQueries(corpusID); if (aliasName != null && !aliasName.isEmpty()) { addCorpusAlias(corpusID, aliasName); } return true; } ///// Subtasks of importing a corpus protected void dropIndexes() { log.info("dropping indexes"); for (String index : listIndexesOnTables(allTables())) { log.debug("dropping index: " + index); getJdbcTemplate().execute("DROP INDEX " + index); } } void createStagingAreaV33(boolean useTemporary) { log.info("creating staging area for import format version 3.3"); MapSqlParameterSource args = makeArgs().addValue(":tmp", useTemporary ? "TEMPORARY" : "UNLOGGED"); executeSqlFromScript("staging_area.sql", args); } void createStagingAreaV32(boolean useTemporary) { log.info("creating staging area for import format version 3.1/3.2"); MapSqlParameterSource args = makeArgs().addValue(":tmp", useTemporary ? "TEMPORARY" : "UNLOGGED"); executeSqlFromScript("staging_area_v32.sql", args); } /** * Reads tab seperated files from the filesystem, but it takes only files into * account with the {@link DefaultAdministrationDao#REL_ANNIS_FILE_SUFFIX} * suffix. Further it is straight forward except for the * {@link DefaultAdministrationDao#FILE_RESOLVER_VIS_MAP} and the * {@link DefaultAdministrationDao#EXAMPLE_QUERIES_TAB}. This is done by this * method automatically. * * <ul> * * <li>{@link DefaultAdministrationDao#FILE_RESOLVER_VIS_MAP}: For backwards * compatibility, the columns must be counted, since there exists one * additional column for visibility behaviour of visualizers.</li> * * <li>{@link DefaultAdministrationDao#EXAMPLE_QUERIES_TAB}: Takes into * account the state of {@link #generateExampleQueries}.</li> * * </ul> * * @param path The path to the ANNIS files. The files have to have this suffix * @param version {@link DefaultAdministrationDao#REL_ANNIS_FILE_SUFFIX} */ void bulkImport(String path, ANNISFormatVersion version) { log.info("bulk-loading data"); for (String table : importedTables) { if (table.equalsIgnoreCase(FILE_RESOLVER_VIS_MAP)) { importResolverVisMapTable(path, table); } // check if example query exists. If not copy it from the resource folder. else if (table.equalsIgnoreCase(EXAMPLE_QUERIES_TAB)) { File f = new File(path, table + annisFileSuffix); if (f.exists()) { log.info(table + annisFileSuffix + " file exists"); bulkloadTableFromResource(tableInStagingArea(table), new FileSystemResource(f)); if (generateExampleQueries == (EXAMPLE_QUERIES_CONFIG.IF_MISSING)) { generateExampleQueries = EXAMPLE_QUERIES_CONFIG.FALSE; } } else { if (generateExampleQueries == EXAMPLE_QUERIES_CONFIG.IF_MISSING) { generateExampleQueries = EXAMPLE_QUERIES_CONFIG.TRUE; } log.info(table + annisFileSuffix + " file not found"); } } else if (table.equalsIgnoreCase("node")) { bulkImportNode(path, version); } else { bulkloadTableFromResource(tableInStagingArea(table), new FileSystemResource(new File(path, table + annisFileSuffix))); } } } private void bulkImportNode(String path, ANNISFormatVersion version) { // check column number by reading first line File nodeTabFile = new File(path, "node" + annisFileSuffix); try (BufferedReader reader = new BufferedReader( new InputStreamReader(new FileInputStream(nodeTabFile), "UTF-8"));) { String firstLine = reader.readLine(); int columnNumber = firstLine == null ? 13 : StringUtils.splitPreserveAllTokens(firstLine, '\t').length; if (version == ANNISFormatVersion.V3_3 || version == ANNISFormatVersion.V3_2) { // new node table with segmentations // no special handling needed bulkloadTableFromResource(tableInStagingArea("node"), new FileSystemResource(nodeTabFile)); } else if (version == ANNISFormatVersion.V3_1) { getJdbcTemplate().execute("DROP TABLE IF EXISTS _tmpnode;"); // old node table without segmentations // create temporary table for bulk import getJdbcTemplate().execute("CREATE TEMPORARY TABLE _tmpnode" + "\n(\n" + "id bigint,\n" + "text_ref integer,\n" + "corpus_ref integer,\n" + "namespace varchar,\n" + "name varchar,\n" + "\"left\" integer,\n" + "\"right\" integer,\n" + "token_index integer,\n" + "continuous boolean,\n" + "span varchar\n" + ");"); bulkloadTableFromResource("_tmpnode", new FileSystemResource(nodeTabFile)); log.info("copying nodes from temporary helper table into staging area"); getJdbcTemplate().execute("INSERT INTO " + tableInStagingArea("node") + "\n" + " SELECT id, text_ref, corpus_ref, namespace AS layer, name, \"left\", " + "\"right\", token_index, " + "NULL AS seg_name, NULL AS seg_left, NULL AS seg_left, continuous, " + "span\n" + "FROM _tmpnode"); } else { throw new RuntimeException("Illegal number of columns in node" + annisFileSuffix + ", " + "should be 13 or 10 but was " + columnNumber); } } catch (IOException ex) { log.error(null, ex); } } void createStagingAreaIndexes(ANNISFormatVersion version) { log.info("creating indexes for staging area"); if (version == ANNISFormatVersion.V3_3) { executeSqlFromScript("indexes_staging_v33.sql"); } else { executeSqlFromScript("indexes_staging_v32.sql"); } } void computeTopLevelCorpus() { log.info("computing top-level corpus"); executeSqlFromScript("toplevel_corpus.sql"); } void importBinaryData(String path, String toplevelCorpusName) { log.info("importing all binary data from ExtData"); File extData = new File(path + "/ExtData"); if (extData.canRead() && extData.isDirectory()) { // import toplevel corpus media files File[] topFiles = extData.listFiles((FileFilter) FileFileFilter.FILE); for (File data : topFiles) { String extension = FilenameUtils.getExtension(data.getName()); try { if (mimeTypeMapping.containsKey(extension)) { log.info("import " + data.getCanonicalPath() + " to staging area"); // search for corpus_ref String sqlScript = "SELECT id FROM _corpus WHERE top_level IS TRUE LIMIT 1"; long corpusID = getJdbcTemplate().queryForLong(sqlScript); importSingleFile(data.getCanonicalPath(), toplevelCorpusName, corpusID); } else { log.warn("not importing " + data.getCanonicalPath() + " since file type is unknown"); } } catch (IOException ex) { log.error("no canonical path given", ex); } } // get each subdirectory (which corresponds to an document name) File[] documents = extData.listFiles((FileFilter) DirectoryFileFilter.DIRECTORY); for (File doc : documents) { if (doc.isDirectory() && doc.canRead()) { File[] dataFiles = doc.listFiles((FileFilter) FileFileFilter.FILE); for (File data : dataFiles) { String extension = FilenameUtils.getExtension(data.getName()); try { if (mimeTypeMapping.containsKey(extension)) { log.info("import " + data.getCanonicalPath() + " to staging area"); // search for corpus_ref String sqlScript = "SELECT id FROM _corpus WHERE \"name\" = ? LIMIT 1"; long corpusID = getJdbcTemplate().queryForLong(sqlScript, doc.getName()); importSingleFile(data.getCanonicalPath(), toplevelCorpusName, corpusID); } else { log.warn( "not importing " + data.getCanonicalPath() + " since file type is unknown"); } } catch (IOException ex) { log.error("no canonical path given", ex); } } } } } } /** * Imports a single binary file. * * @param file Specifies the file to be imported. * @param corpusRef Assigns the file this corpus. * @param toplevelCorpusName The toplevel corpus name */ private void importSingleFile(String file, String toplevelCorpusName, long corpusRef) { BinaryImportHelper preStat = new BinaryImportHelper(file, getRealDataDir(), toplevelCorpusName, corpusRef, mimeTypeMapping); getJdbcTemplate().execute(BinaryImportHelper.SQL, preStat); } /** * Updates the example queries table in the staging area. The final toplevel * corpus must already be computed. * * @param toplevelID The final top level corpus id. * */ void extendStagingExampleQueries(long toplevelID) { log.info("extending _example_queries"); executeSqlFromScript("extend_staging_example_queries.sql", makeArgs().addValue(":id", toplevelID)); } void extendStagingText(long toplevelID) { log.info("extending _text"); executeSqlFromScript("extend_staging_text.sql", makeArgs().addValue(":id", toplevelID)); } void computeLeftTokenRightToken() { log.info("computing values for struct.left_token and struct.right_token"); executeSqlFromScript("left_token_right_token.sql"); // re-analyze node since we added new columns log.info("analyzing node"); getJdbcTemplate().execute("ANALYZE " + tableInStagingArea("node")); } void computeRealRoot() { log.info("computing real root for rank"); executeSqlFromScript("root.sql"); } void computeLevel() { log.info("computing values for rank.level (dominance and pointing relations)"); executeSqlFromScript("level.sql"); log.info("computing values for rank.level (coverage)"); executeSqlFromScript("level_coverage.sql"); } void computeCorpusStatistics(String path) { File f = new File(path); String absolutePath = path; try { absolutePath = f.getCanonicalPath(); } catch (IOException ex) { log.error("Something went really wrong when calculating the canonical path", ex); } log.info("computing statistics for top-level corpus"); MapSqlParameterSource args = makeArgs().addValue(":path", absolutePath); executeSqlFromScript("corpus_stats.sql", args); } void computeCorpusPath(long corpusID) { MapSqlParameterSource args = makeArgs().addValue(":id", corpusID); log.info("computing path information of the corpus tree for corpus with ID " + corpusID); executeSqlFromScript("compute_corpus_path.sql", args); } void addDocumentNameMetaData() { log.info("add the document name as metadata"); executeSqlFromScript("adddocmetadata.sql"); } protected void adjustRankPrePost() { log.info("updating pre and post order in _rank"); executeSqlFromScript("adjustrankprepost.sql"); log.info("analyzing _rank"); getJdbcTemplate().execute("ANALYZE " + tableInStagingArea("rank")); } protected void adjustTextId() { log.info("updating id in _text and text_ref in _node"); executeSqlFromScript("adjusttextid.sql"); log.info("analyzing _node and _text"); getJdbcTemplate().execute("ANALYZE " + tableInStagingArea("text")); getJdbcTemplate().execute("ANALYZE " + tableInStagingArea("node")); } protected void addUniqueNodeNameAppendix() { // first check if this is actually necessary log.info("check if node names are unique"); getJdbcTemplate().execute("ALTER TABLE _node ADD COLUMN unique_name_appendix varchar;"); List<Integer> checkDuplicate = getJdbcTemplate().queryForList( "SELECT COUNT(*) from _node GROUP BY \"name\", corpus_ref HAVING COUNT(*) > 1 LIMIT 1", Integer.class); if (checkDuplicate.isEmpty()) { log.info("node names are unique, no update necessary"); } else { log.info("add an unique node name appendix"); executeSqlFromScript("unique_node_name_appendix.sql"); } } /** * * @return the new corpus ID */ void createNodeIdMapping() { log.info("creating node ID mapping (for properly sorted IDs)"); executeSqlFromScript("node_id_mapping.sql"); } /* private long getRecentCorpusID() { int numOfEntries = getJdbcTemplate().queryForObject( "SELECT COUNT(*) from corpus_stats", Integer.class); long recentCorpusId = 0; if (numOfEntries > 0) { recentCorpusId = getJdbcTemplate().queryForObject( "SELECT max(id) FROM corpus_stats", Long.class); log.debug("the id from recently imported corpus: {}", recentCorpusId); } return recentCorpusId; } */ long getNewToplevelCorpusID(Offsets offsets) { log.info("query for the new corpus ID"); long maxLocalID = getJdbcTemplate().queryForObject("SELECT MAX(id) FROM _corpus WHERE top_level IS TRUE", Long.class); long result = maxLocalID + offsets.getCorpusID(); log.info("new corpus ID is " + result); return result; } void computeSpanFromSegmentation() { log.info("computing span value for segmentation nodes"); executeSqlFromScript("span_from_segmentation.sql"); } void applyConstraints() { log.info("activating relational constraints"); executeSqlFromScript("constraints.sql"); } private Offsets calculateOffsets() { log.info("querying ID offsets"); long offsetCorpusID = getJdbcTemplate() .queryForObject("SELECT COALESCE((SELECT max(max_corpus_id)+1 FROM corpus_stats),0)", Long.class); long offsetCorpusPost = getJdbcTemplate() .queryForObject("SELECT COALESCE((SELECT max(max_corpus_post)+1 FROM corpus_stats),0)", Long.class); long offsetNodeID = getJdbcTemplate() .queryForObject("SELECT COALESCE((SELECT max(max_node_id)+1 FROM corpus_stats),0)", Long.class); return new Offsets(offsetCorpusID, offsetCorpusPost, offsetNodeID); } void insertCorpus(long corpusID, Offsets offsets) { MapSqlParameterSource args = offsets.makeArgs().addValue(":id", corpusID); log.info("moving corpus from staging area to main db"); for (String table : tablesToCopyManually) { log.debug("moving table \"{}\" from staging area to main db", table); StringBuilder sql = new StringBuilder(); String predefinedFrom = tableInsertFrom == null ? null : tableInsertFrom.get(table); String predefinedSelect = tableInsertSelect == null ? null : tableInsertSelect.get(table); if (predefinedFrom != null || predefinedSelect != null) { if (predefinedFrom == null) { predefinedFrom = predefinedSelect; } sql.append("INSERT INTO "); sql.append(table); sql.append(" ( "); sql.append(predefinedSelect); sql.append(" ) (SELECT "); sql.append(predefinedFrom); sql.append(" FROM "); sql.append(tableInStagingArea(table)).append(")"); } else { sql.append("INSERT INTO "); sql.append(table); sql.append(" (SELECT * FROM "); sql.append(tableInStagingArea(table)).append(")"); } executeSql(sql.toString(), args); } } void dropStagingArea() { log.info("dropping staging area"); // tables must be dropped in reverse order List<String> tables = importedAndCreatedTables(); Collections.reverse(tables); for (String table : tables) { getJdbcTemplate().execute("DROP TABLE " + tableInStagingArea(table)); } } void dropMaterializedTables() { log.info("dropping materialized tables"); getJdbcTemplate().execute("DROP TABLE facts"); } void analyzeStagingTables() { log.info("analyzing staging area"); for (String t : importedTables) { log.debug("analyzing " + t); getJdbcTemplate().execute("ANALYZE " + tableInStagingArea(t)); } } void createAnnotations(long corpusID) { MapSqlParameterSource args = makeArgs().addValue(":id", corpusID); log.info("creating annotations table for corpus with ID " + corpusID); executeSqlFromScript("annotations.sql", args); log.info("indexing annotations table for corpus with ID " + corpusID); executeSqlFromScript("indexes_annotations.sql", args); } void createAnnoCategory(long corpusID) { MapSqlParameterSource args = makeArgs().addValue(":id", corpusID); log.info("creating annotation category table for corpus with ID " + corpusID); executeSqlFromScript("annotation_category.sql", args); } void analyzeFacts(long corpusID) { log.info("analyzing facts table for corpus with ID " + corpusID); getJdbcTemplate().execute("ANALYZE facts_" + corpusID); } /** * Update the statistics for the "facts" table as a whole. */ @Transactional(propagation = Propagation.REQUIRED) public void analyzeParentFacts() { log.info("analyzing parent facts table"); // explicitly unset any timeout. Since this function might be called independent // from the import process we have to set it manually. getJdbcTemplate().update("SET statement_timeout TO 0"); getJdbcTemplate().execute("ANALYZE facts"); } void createFacts(long corpusID, ANNISFormatVersion version, Offsets offsets) { MapSqlParameterSource args = offsets.makeArgs().addValue(":id", corpusID); log.info("creating materialized facts table for corpus with ID " + corpusID); if (version == ANNISFormatVersion.V3_3) { executeSqlFromScript("facts.sql", args); } else { executeSqlFromScript("facts_v32.sql", args); } log.info("indexing the new facts table (general indexes)"); executeSqlFromScript("indexes.sql", args); log.info("indexing the new facts table (edge related indexes)"); executeSqlFromScript("indexes_edge.sql", args); } void removeUnecessarySpanningRelations() { log.info("setting \"continuous\" to a correct value"); executeSqlFromScript("set_continuous.sql"); // re-analyze node since we added new columns log.info("analyzing node"); getJdbcTemplate().execute("ANALYZE " + tableInStagingArea("node")); log.info("removing unnecessary span relations"); executeSqlFromScript("remove_span_relations.sql"); } ///// Other sub tasks public List<Long> listToplevelCorpora() { String sql = "SELECT id FROM corpus WHERE top_level = 'y'"; return getJdbcTemplate().query(sql, ParameterizedSingleColumnRowMapper.newInstance(Long.class)); } /** * Delete files not used by this instance in the data directory. */ @Transactional(readOnly = true) public void cleanupData() { List<String> allFilesInDatabaseList = getJdbcTemplate() .queryForList("SELECT filename FROM media_files AS m", String.class); File dataDir = getRealDataDir(); Set<File> allFilesInDatabase = new HashSet<>(); for (String singleFileName : allFilesInDatabaseList) { allFilesInDatabase.add(new File(dataDir, singleFileName)); } log.info("Cleaning up the data directory"); // go through each file of the folder and check if it is not included File[] childFiles = dataDir.listFiles(); if (childFiles != null) { for (File f : childFiles) { if (f.isFile() && !allFilesInDatabase.contains(f)) { if (!f.delete()) { log.warn("Could not delete {}", f.getAbsolutePath()); } } } } } public List<Map<String, Object>> listCorpusStats() { return getJdbcTemplate().queryForList("SELECT * FROM corpus_info ORDER BY name"); } /** * Lists the corpora using the connection information of a given * "database.properties". file * * @param databaseProperties * @return */ public List<Map<String, Object>> listCorpusStats(File databaseProperties) { List<Map<String, Object>> result = new LinkedList<>(); DataSource origDataSource = getDataSource().getInnerDataSource(); try { if (databaseProperties != null) { getDataSource().setInnerDataSource(createDataSource(databaseProperties)); } result = getJdbcTemplate().queryForList("SELECT * FROM corpus_info ORDER BY name"); } catch (IOException | URISyntaxException | DataAccessException ex) { if (databaseProperties == null) { log.error("Could not query corpus list", ex); } else { log.error("Could not query corpus list for the file " + databaseProperties.getAbsolutePath(), ex); } } finally { getDataSource().setInnerDataSource(origDataSource); } return result; } public List<String> listUsedIndexes() { log.info("retrieving list of used indexes"); return listIndexDefinitions(true); } public List<String> listUnusedIndexes() { log.info("retrieving list of unused indexes"); return listIndexDefinitions(false); } /** * Provides a list where the keys are the aliases and the values are the * corpus names. * * @param databaseProperties * @return */ public Multimap<String, String> listCorpusAlias(File databaseProperties) { Multimap<String, String> result = TreeMultimap.create(); DataSource origDataSource = getDataSource().getInnerDataSource(); try { if (databaseProperties != null) { getDataSource().setInnerDataSource(createDataSource(databaseProperties)); } result = getJdbcTemplate().query( "SELECT a.alias AS alias, c.name AS corpus\n" + "FROM corpus_alias AS a, corpus AS c\n" + "WHERE\n" + " a.corpus_ref = c.id", new ResultSetExtractor<Multimap<String, String>>() { @Override public Multimap<String, String> extractData(ResultSet rs) throws SQLException, DataAccessException { Multimap<String, String> data = TreeMultimap.create(); while (rs.next()) { // alias -> corpus name data.put(rs.getString(1), rs.getString(2)); } return data; } }); } catch (IOException | URISyntaxException | DataAccessException ex) { if (databaseProperties == null) { log.error("Could not query corpus list", ex); } else { log.error("Could not query corpus list for the file " + databaseProperties.getAbsolutePath(), ex); } } finally { getDataSource().setInnerDataSource(origDataSource); } return result; } @Transactional(readOnly = true) public UserConfig retrieveUserConfig(final String userName) { String sql = "SELECT * FROM user_config WHERE id=?"; UserConfig config = getJdbcTemplate().query(sql, new Object[] { userName }, new ResultSetExtractor<UserConfig>() { @Override public UserConfig extractData(ResultSet rs) throws SQLException, DataAccessException { // default to empty config UserConfig c = new UserConfig(); if (rs.next()) { try { c = jsonMapper.readValue(rs.getString("config"), UserConfig.class); } catch (IOException ex) { log.error("Could not parse JSON that is stored in database (user configuration)", ex); } } return c; } }); return config; } @Transactional(readOnly = false) public void storeUserConfig(String userName, UserConfig config) { String sqlUpdate = "UPDATE user_config SET config=?::json WHERE id=?"; String sqlInsert = "INSERT INTO user_config(id, config) VALUES(?,?)"; try { String jsonVal = jsonMapper.writeValueAsString(config); // if no row was affected there is no entry yet and we should create one if (getJdbcTemplate().update(sqlUpdate, jsonVal, userName) == 0) { getJdbcTemplate().update(sqlInsert, userName, jsonVal); } } catch (IOException ex) { log.error("Cannot serialize user config JSON for database.", ex); } } public void addCorpusAlias(long corpusID, String alias) { getJdbcTemplate().update( "INSERT INTO corpus_alias (alias, corpus_ref)\n" + "VALUES(\n" + " ?, \n" + " ?\n" + ");", alias, corpusID); } public void addCorpusAlias(String corpusName, String alias) { getJdbcTemplate().update("INSERT INTO corpus_alias (alias, corpus_ref)\n" + "SELECT ? AS alias, c.id\n" + "FROM corpus AS c WHERE c.top_level AND c.name=? LIMIT 1;", alias, corpusName); } ///// Helpers private List<String> importedAndCreatedTables() { List<String> tables = new ArrayList<>(); tables.addAll(Arrays.asList(importedTables)); tables.addAll(Arrays.asList(createdTables)); return tables; } private List<String> allTables() { List<String> tables = new ArrayList<>(); tables.addAll(Arrays.asList(importedTables)); tables.addAll(Arrays.asList(createdTables)); //tables.addAll(Arrays.asList(materializedTables)); return tables; } private ParameterizedSingleColumnRowMapper<String> stringRowMapper() { return ParameterizedSingleColumnRowMapper.newInstance(String.class); } // executes an SQL script from $ANNIS_HOME/scripts @Transactional(propagation = Propagation.MANDATORY) public PreparedStatement executeSqlFromScript(String script) { return executeSqlFromScript(script, null); } // bulk-loads a table from a resource private void bulkloadTableFromResource(String table, Resource resource) { log.debug("bulk-loading data from '" + resource.getFilename() + "' into table '" + table + "'"); String sql = "COPY " + table + " FROM STDIN WITH DELIMITER E'\t' NULL AS 'NULL'"; try { // retrieve the currently open connection if running inside a transaction Connection originalCon = DataSourceUtils.getConnection(getDataSource()); Connection con = originalCon; if (con instanceof DelegatingConnection) { DelegatingConnection<?> delCon = (DelegatingConnection<?>) con; con = delCon.getInnermostDelegate(); } Preconditions.checkState(con instanceof PGConnection, "bulk-loading only works with a PostgreSQL JDBC connection"); // Postgres JDBC4 8.4 driver now supports the copy API PGConnection pgCon = (PGConnection) con; pgCon.getCopyAPI().copyIn(sql, resource.getInputStream()); DataSourceUtils.releaseConnection(originalCon, getDataSource()); } catch (SQLException e) { throw new DatabaseAccessException(e); } catch (IOException e) { throw new FileAccessException(e); } } // get a list of indexes on the imported Snd created tables tables which are not // auto-created by postgres (namely, primary key and unique constraints) // exploits the fact that the index has the same name as the constraint private List<String> listIndexesOnTables(List<String> tables) { String sql = "" + "SELECT indexname " + "FROM pg_indexes " + "WHERE tablename IN (" + StringUtils.repeat("?", ",", tables.size()) + ") " + "AND lower(indexname) NOT IN " + " (SELECT lower(conname) FROM pg_constraint WHERE contype in ('p', 'u'))"; return getJdbcTemplate().query(sql, tables.toArray(), stringRowMapper()); } private List<String> listIndexDefinitions(boolean used) { return listIndexDefinitions(used, allTables()); } /* * Returns the CREATE INDEX statement for all indexes on the Annis tables, * that are not auto-created by PostgreSQL (primary keys and unique * constraints). * * @param used If True, return used indexes. If False, return unused indexes * (scan count is 0). */ public List<String> listIndexDefinitions(boolean used, List<String> tables) { String scansOp = used ? "!=" : "="; String sql = "SELECT pg_get_indexdef(x.indexrelid) AS indexdef " + "FROM pg_index x, pg_class c " + "WHERE x.indexrelid = c.oid " + "AND c.relname IN ( " + StringUtils.repeat("?", ",", tables.size()) + ") " + "AND pg_stat_get_numscans(x.indexrelid) " + scansOp + " 0"; return getJdbcTemplate().query(sql, tables.toArray(), stringRowMapper()); } public List<String> listIndexDefinitions(String... tables) { String sql = "" + "SELECT pg_get_indexdef(x.indexrelid) AS indexdef " + "FROM pg_index x, pg_class c, pg_indexes i " + "WHERE x.indexrelid = c.oid " + "AND c.relname = i.indexname " + "AND i.tablename IN ( " + StringUtils.repeat("?", ",", tables.length) + " )"; return getJdbcTemplate().query(sql, tables, new ParameterizedSingleColumnRowMapper<String>()); } public List<String> listUsedIndexes(String... tables) { String sql = "" + "SELECT pg_get_indexdef(x.indexrelid) AS indexdef " + "FROM pg_index x, pg_class c, pg_indexes i " + "WHERE x.indexrelid = c.oid " + "AND c.relname = i.indexname " + "AND i.tablename IN ( " + StringUtils.repeat("?", ",", tables.length) + " ) " + "AND pg_stat_get_numscans(x.indexrelid) != 0"; return getJdbcTemplate().query(sql, tables, new ParameterizedSingleColumnRowMapper<String>()); } public boolean resetStatistics() { try { getJdbcTemplate().queryForList("SELECT pg_stat_reset()"); return true; } catch (DataAccessException e) { return false; } } /** * Retrieves the name of the top level corpus in the corpus.tab file. * * <p> * At this point, the tab files must be in the staging area.</p> * * @return The name of the toplevel corpus or an empty String if no top level * corpus is found. */ private String getTopLevelCorpusFromTmpArea() { String sql = "SELECT name FROM " + tableInStagingArea("corpus") + " WHERE type='CORPUS'\n" + "AND pre = (SELECT min(pre) FROM " + tableInStagingArea("corpus") + ")\n" + "AND post = (SELECT max(post) FROM " + tableInStagingArea("corpus") + ")"; return getJdbcTemplate().query(sql, new ResultSetExtractor<String>() { @Override public String extractData(ResultSet rs) throws SQLException, DataAccessException { if (rs.next()) { return rs.getString("name"); } else { return null; } } }); } ///// Getter / Setter public boolean isTemporaryStagingArea() { return temporaryStagingArea; } public void setTemporaryStagingArea(boolean temporaryStagingArea) { this.temporaryStagingArea = temporaryStagingArea; } /** * Get the name and version of the schema this @{link AdministrationDao} is * configured to work with. * * @return */ public String getSchemaVersion() { return schemaVersion; } public void setSchemaVersion(String schemaVersion) { this.schemaVersion = schemaVersion; } public Map<String, String> getMimeTypeMapping() { return mimeTypeMapping; } public void setMimeTypeMapping(Map<String, String> mimeTypeMapping) { this.mimeTypeMapping = mimeTypeMapping; } public Map<String, String> getTableInsertSelect() { return tableInsertSelect; } public void setTableInsertSelect(Map<String, String> tableInsertSelect) { this.tableInsertSelect = tableInsertSelect; } public Map<String, String> getTableInsertFrom() { return tableInsertFrom; } public void setTableInsertFrom(Map<String, String> tableInsertFrom) { this.tableInsertFrom = tableInsertFrom; } private void readOldResolverVisMapFormat(File resolver_vis_tab) { StringBuilder sb = new StringBuilder(); sb.append("CREATE TABLE tmp_resolver_vis_map "); sb.append("( "); sb.append("\"corpus\" varchar, "); sb.append("\"version\" varchar, "); sb.append("\"namespace\" varchar, "); sb.append("\"element\" varchar, "); sb.append("\"vis_type\" varchar NOT NULL, "); sb.append("\"display_name\" varchar NOT NULL, "); sb.append("\"order\" integer default '0', "); sb.append("\"mappings\" varchar"); sb.append(");"); getJdbcTemplate().execute(sb.toString()); bulkloadTableFromResource("tmp_resolver_vis_map", new FileSystemResource(resolver_vis_tab)); sb = new StringBuilder(); sb.append("INSERT INTO "); sb.append(tableInStagingArea(FILE_RESOLVER_VIS_MAP)); sb.append("\n\t"); sb.append(" ("); sb.append("corpus, "); sb.append("version, "); sb.append("namespace, "); sb.append("element, "); sb.append("vis_type, "); sb.append("display_name, "); sb.append("\"order\", "); sb.append("mappings"); sb.append(")"); sb.append("\n"); sb.append("SELECT tmp.corpus, "); sb.append("tmp.version, "); sb.append("tmp.namespace, "); sb.append("tmp.element, "); sb.append("tmp.vis_type, "); sb.append("tmp.display_name, "); sb.append("tmp.\"order\", "); sb.append("tmp.mappings"); sb.append("\n\t"); sb.append("FROM tmp_resolver_vis_map AS tmp; "); getJdbcTemplate().execute(sb.toString()); getJdbcTemplate().execute("DROP TABLE tmp_resolver_vis_map;"); } /** * Imported the old and the new version of the resolver_vis_map.tab. The new * version has an additional column for visibility status of the * visualization. * * @param path The path to the ANNIS file. * @param table The final table in the database of the resolver_vis_map table. */ private void importResolverVisMapTable(String path, String table) { try { // count cols for detecting old resolver_vis_map table format File resolver_vis_tab = new File(path, table + annisFileSuffix); if (!resolver_vis_tab.isFile()) { return; } String firstLine; try (BufferedReader bReader = new BufferedReader( new InputStreamReader(new FileInputStream(resolver_vis_tab), "UTF-8"))) { firstLine = bReader.readLine(); } int cols = 9; // default number if (firstLine != null) { String[] entries = firstLine.split("\t"); cols = entries.length; log.debug("the first row: {} amount of cols: {}", entries, cols); } switch (cols) { // old format case 8: readOldResolverVisMapFormat(resolver_vis_tab); break; // new format case 9: bulkloadTableFromResource(tableInStagingArea(table), new FileSystemResource(new File(path, table + annisFileSuffix))); break; default: log.error("invalid amount of cols"); throw new RuntimeException(); } } catch (IOException | FileAccessException e) { log.error("could not read {}", table, e); } } /** * Removes any unwanted entries from the resolver_vis_map table * * @param toplevelCorpus * @param table */ private void fixResolverVisMapTable(String toplevelCorpus, String table) { log.info("checking resolver_vis_map for errors"); // delete all entries that reference a different corpus than the imported one int invalidRows = getJdbcTemplate().update("DELETE FROM " + table + " WHERE corpus <> ?", toplevelCorpus); if (invalidRows > 0) { log.warn( "there were " + invalidRows + " rows in the resolver_vis_map that referenced the wrong corpus"); } } /** * Generates example queries if no example queries tab file is defined by the * user. */ private void generateExampleQueries(long corpusID) { // set in the annis.properties file. if (generateExampleQueries == EXAMPLE_QUERIES_CONFIG.TRUE) { queriesGenerator.generateQueries(corpusID); } } /** * @return the generateExampleQueries */ public EXAMPLE_QUERIES_CONFIG isGenerateExampleQueries() { return generateExampleQueries; } /** * @param generateExampleQueries the generateExampleQueries to set */ public void setGenerateExampleQueries(EXAMPLE_QUERIES_CONFIG generateExampleQueries) { this.generateExampleQueries = generateExampleQueries; } /** * Counts nodes and operators of the AQL example query and writes it back to * the staging area. * * @param corpusID specifies the corpus, the analyze things. * */ private void analyzeAutoGeneratedQueries(long corpusID) { // read the example queries from the staging area List<ExampleQuery> exampleQueries = getJdbcTemplate().query("SELECT * FROM _" + EXAMPLE_QUERIES_TAB, new RowMapper<ExampleQuery>() { @Override public ExampleQuery mapRow(ResultSet rs, int i) throws SQLException { ExampleQuery eQ = new ExampleQuery(); eQ.setExampleQuery(rs.getString("example_query")); return eQ; } }); // count the nodes of the aql Query countExampleQueryNodes(exampleQueries); // fetch the operators getOperators(exampleQueries, "\\.(\\*)?|\\>|\\>\\*|_i_"); writeAmountOfNodesBack(exampleQueries); } /** * Maps example queries to integer, which represents the amount of nodes of * the aql query. * */ private void countExampleQueryNodes(List<ExampleQuery> exampleQueries) { for (ExampleQuery eQ : exampleQueries) { QueryData query = getAnnisDao().parseAQL(eQ.getExampleQuery(), null); int count = 0; for (List<QueryNode> qNodes : query.getAlternatives()) { count += qNodes.size(); } eQ.setNodes(count); } } /** * Writes the counted nodes and the used operators back to the staging area. * */ private void writeAmountOfNodesBack(List<ExampleQuery> exampleQueries) { StringBuilder sb = new StringBuilder(); for (ExampleQuery eQ : exampleQueries) { sb.append("UPDATE ").append("_").append(EXAMPLE_QUERIES_TAB).append(" SET "); sb.append("nodes=").append(String.valueOf(eQ.getNodes())); sb.append(" WHERE example_query='"); sb.append(eQ.getExampleQuery()).append("';\n"); sb.append("UPDATE ").append("_").append(EXAMPLE_QUERIES_TAB).append(" SET "); sb.append("used_ops='").append(String.valueOf(eQ.getUsedOperators())); sb.append("' WHERE example_query='"); sb.append(eQ.getExampleQuery()).append("';\n"); } getJdbcTemplate().execute(sb.toString()); } /** * Fetches operators used in the {@link ExampleQuery#getExampleQuery()} with a * given regex. * * @param exQueries Set the used operators property of each member. * @param regex The regex to search operators. */ private void getOperators(List<ExampleQuery> exQueries, String regex) { Pattern opsRegex = Pattern.compile(regex); for (ExampleQuery eQ : exQueries) { List<String> ops = new ArrayList<>(); Matcher m = opsRegex.matcher(eQ.getExampleQuery().replaceAll("\\s", "")); while (m.find()) { ops.add(m.group()); } eQ.setUsedOperators("{" + StringUtils.join(ops, ",") + "}"); } } /** * @return the queriesGenerator */ public QueriesGenerator getQueriesGenerator() { return queriesGenerator; } /** * @param queriesGenerator the queriesGenerator to set */ public void setQueriesGenerator(QueriesGenerator queriesGenerator) { this.queriesGenerator = queriesGenerator; } public DeleteCorpusDao getDeleteCorpusDao() { return deleteCorpusDao; } public void setDeleteCorpusDao(DeleteCorpusDao deleteCorpusDao) { this.deleteCorpusDao = deleteCorpusDao; } /** * Checks, if a already exists a corpus with the same name of the top level * corpus in the corpus.tab file. If this is the case an Exception is thrown * and the import is aborted. * * @throws * annis.administration.DefaultAdministrationDao.ConflictingCorpusException */ private void checkTopLevelCorpus() throws ConflictingCorpusException { String corpusName = getTopLevelCorpusFromTmpArea(); if (existConflictingTopLevelCorpus(corpusName)) { String msg = "There already exists a top level corpus with the name: " + corpusName; throw new ConflictingCorpusException(msg); } } private ANNISFormatVersion getANNISFormatVersion(String path) { File pathDir = new File(path); if (pathDir.isDirectory()) { // check for existance of "annis.version" file File versionFile = new File(pathDir, "annis.version"); if (versionFile.isFile() && versionFile.exists()) { try { // read the first line String firstLine = Files.readFirstLine(versionFile, Charsets.UTF_8); if ("3.3".equals(firstLine.trim())) { return ANNISFormatVersion.V3_3; } } catch (IOException ex) { log.warn("Could not read annis.version file", ex); } } else { // we have to distinguish between 3.1 and 3.2 File nodeTab = new File(pathDir, "node.tab"); if (nodeTab.isFile() && nodeTab.exists()) { try { String firstLine = Files.readFirstLine(nodeTab, Charsets.UTF_8); List<String> cols = Splitter.on('\t').splitToList(firstLine); if (cols.size() == 13) { return ANNISFormatVersion.V3_2; } else if (cols.size() == 10) { return ANNISFormatVersion.V3_1; } } catch (IOException ex) { log.warn("Could not read node.tab file", ex); } } } } return ANNISFormatVersion.UNKNOWN; } public static class ConflictingCorpusException extends AnnisException { public ConflictingCorpusException(String msg) { super(msg); } } public static class Offsets { private final long corpusID; private final long corpusPost; private final long nodeID; public Offsets(long corpusID, long corpusPost, long nodeID) { this.corpusID = corpusID; this.corpusPost = corpusPost; this.nodeID = nodeID; } public long getCorpusID() { return corpusID; } public long getCorpusPost() { return corpusPost; } public MapSqlParameterSource makeArgs() { return new MapSqlParameterSource().addValue(":offset_corpus_id", corpusID) .addValue(":offset_corpus_post", corpusPost).addValue(":offset_node_id", nodeID); } } }