Java tutorial
/* * Copyright 2012 SFB 632. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package annis.administration; import java.io.BufferedWriter; import java.io.File; import java.io.IOException; import java.util.Arrays; import java.util.List; import java.util.Map; import org.springframework.transaction.annotation.Transactional; import annis.AnnisRunnerException; import annis.CommonHelper; import annis.exceptions.AnnisException; import annis.service.objects.ImportJob; import annis.utils.ANNISFormatHelper; import com.google.common.base.Joiner; import com.google.common.collect.Multimap; import com.google.common.io.ByteStreams; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.util.ArrayList; import java.util.Enumeration; import java.util.HashMap; import java.util.LinkedHashSet; import java.util.LinkedList; import java.util.Map.Entry; import java.util.Set; import java.util.zip.ZipEntry; import java.util.zip.ZipException; import java.util.zip.ZipFile; import javax.mail.internet.AddressException; import javax.mail.internet.InternetAddress; import org.apache.commons.io.FileUtils; import org.apache.commons.io.output.FileWriterWithEncoding; import org.apache.commons.mail.EmailException; import org.apache.commons.mail.SimpleEmail; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * * @author Thomas Krause <krauseto@hu-berlin.de> */ public class CorpusAdministration { private AdministrationDao administrationDao; private DeleteCorpusDao deleteCorpusDao; private SchemeFixer schemeFixer; private String statusMailSender; private static final Logger log = LoggerFactory.getLogger(CorpusAdministration.class); public CorpusAdministration() { } @Transactional(readOnly = false) public void deleteCorpora(List<Long> ids) { // check if corpus exists List<Long> corpora = administrationDao.listToplevelCorpora(); for (long id : ids) { if (!corpora.contains(id)) { throw new AnnisRunnerException("Corpus does not exist (or is not a top-level corpus): " + id, 51); } } log.info("Deleting corpora: " + ids); deleteCorpusDao.deleteCorpora(ids, true); log.info("Finished deleting corpora: " + ids); } public void cleanupData() { administrationDao.cleanupData(); } public void initializeDatabase(String host, String port, String database, String user, String password, String defaultDatabase, String superUser, String superPassword, boolean useSSL, String pgSchema) { log.info("initializing database"); administrationDao.initializeDatabase(host, port, database, user, password, defaultDatabase, superUser, superPassword, useSSL, pgSchema); // write database information to property file writeDatabasePropertiesFile(host, port, database, user, password, useSSL, pgSchema); // create tables and other stuff that is handled by the scheme fixer if (schemeFixer != null) { schemeFixer.checkAndFix(); } } /** * Imports several corpora and catches a possible thrown * {@link DefaultAdministrationDao.ConflictingCorpusException} when the * overwrite flag is set to false. * * * @param overwrite If set to false, a conflicting corpus is not silently * reimported. * @param aliasName An common alias name for all imported corpora or null * @param statusEmailAdress an email adress for informating the admin about * statuses * @param waitForOtherTasks If true wait for other imports to finish, if false * abort the import. * @param paths Valid pathes to corpora. * @return True if all corpora where imported successfully. */ public ImportStatus importCorporaSave(boolean overwrite, String aliasName, String statusEmailAdress, boolean waitForOtherTasks, List<String> paths) { // init the import stats. From the beginning everything is ok ImportStatus importStats = new ImportStatsImpl(); importStats.setStatus(true); // check if database scheme is ok if (schemeFixer != null) { schemeFixer.checkAndFix(); } List<File> roots = new LinkedList<>(); for (String path : paths) { File f = new File(path); if (f.isFile()) { // might be a ZIP-file try (ZipFile zip = new ZipFile(f);) { // get the names of all corpora included in the ZIP file // in order to get a folder name Map<String, ZipEntry> corpora = ANNISFormatHelper.corporaInZipfile(zip); // unzip and add all resulting corpora to import list log.info("Unzipping " + f.getPath()); File outDir = createZIPOutputDir(Joiner.on(", ").join(corpora.keySet())); roots.addAll(unzipCorpus(outDir, zip)); } catch (ZipException ex) { log.error("" + f.getAbsolutePath() + " might not be a valid ZIP file and will be ignored", ex); } catch (IOException ex) { log.error("IOException when importing file " + f.getAbsolutePath() + ", will be ignored", ex); } } else { try { roots.addAll(ANNISFormatHelper.corporaInDirectory(f).values()); } catch (IOException ex) { log.error("Could not find any corpus in " + f.getPath(), ex); importStats.setStatus(false); importStats.addException(f.getAbsolutePath(), ex); } } } // end for each given path // import each corpus separately boolean anyCorpusImported = false; for (File r : roots) { try { log.info("Importing corpus from: " + r.getPath()); if (administrationDao.importCorpus(r.getPath(), aliasName, overwrite, waitForOtherTasks)) { log.info("Finished import from: " + r.getPath()); sendImportStatusMail(statusEmailAdress, r.getPath(), ImportJob.Status.SUCCESS, null); anyCorpusImported = true; } else { importStats.setStatus(false); sendImportStatusMail(statusEmailAdress, r.getPath(), ImportJob.Status.ERROR, null); } } catch (AdministrationDao.ConflictingCorpusException ex) { importStats.setStatus(false); importStats.addException(r.getPath(), ex); log.error("Error on conflicting top level corpus name for {}", r.getPath()); sendImportStatusMail(statusEmailAdress, r.getPath(), ImportJob.Status.ERROR, ex.getMessage()); } catch (org.springframework.transaction.CannotCreateTransactionException ex) { importStats.setStatus(false); importStats.addException(r.getPath(), ex); log.error("Postgres is not running or misconfigured"); } catch (Throwable ex) { importStats.setStatus(false); importStats.addException(r.getPath(), ex); log.error("Error on importing corpus", ex); sendImportStatusMail(statusEmailAdress, r.getPath(), ImportJob.Status.ERROR, ex.getMessage()); } } // end for each corpus if (anyCorpusImported) { administrationDao.analyzeParentFacts(); } return importStats; } /** * Extract the zipped ANNIS corpus files to an output directory. * * @param outDir The ouput directory. * @param zip ZIP-file to extract. * @return A list of root directories where the tab-files are located if * found, null otherwise. */ private List<File> unzipCorpus(File outDir, ZipFile zip) { List<File> rootDirs = new ArrayList<>(); Enumeration<? extends ZipEntry> zipEnum = zip.entries(); while (zipEnum.hasMoreElements()) { ZipEntry e = zipEnum.nextElement(); File outFile = new File(outDir, e.getName().replaceAll("\\/", "/")); if (e.isDirectory()) { if (!outFile.mkdirs()) { log.warn("Could not create output directory " + outFile.getAbsolutePath()); } } // end if directory else { if ("corpus.tab".equals(outFile.getName()) || "corpus.annis".equals(outFile.getName())) { rootDirs.add(outFile.getParentFile()); } if (!outFile.getParentFile().isDirectory()) { if (!outFile.getParentFile().mkdirs()) { { log.warn("Could not create output directory for file " + outFile.getAbsolutePath()); } } } try (FileOutputStream outStream = new FileOutputStream(outFile);) { ByteStreams.copy(zip.getInputStream(e), outStream); } catch (FileNotFoundException ex) { log.error(null, ex); } catch (IOException ex) { log.error(null, ex); } } // end else is file } // end for each entry in zip file return rootDirs; } public File createZIPOutputDir(String corpusName) { File outDir = new File(System.getProperty("user.home"), ".annis/zip-imports/" + CommonHelper.getSafeFileName(corpusName)); if (outDir.exists()) { try { // delete old data inside the corpus directory FileUtils.deleteDirectory(outDir); } catch (IOException ex) { log.warn("Could not recursivly delete the output directory", ex); } } if (!outDir.mkdirs()) { throw new IllegalStateException("Could not create directory " + outDir.getAbsolutePath()); } return outDir; } public static class ImportStatsImpl implements ImportStatus { boolean status = true; private final static String SEPERATOR = "--------------------------\n"; final Map<String, List<Throwable>> exceptions; public ImportStatsImpl() { exceptions = new HashMap<>(); } @Override public boolean getStatus() { return status; } @Override public List<Throwable> getThrowables() { List<Throwable> allThrowables = new ArrayList<>(); for (List<Throwable> l : exceptions.values()) { allThrowables.addAll(l); } return allThrowables; } @Override public List<Throwable> getThrowable(String corpusName) { return exceptions.get(corpusName); } @Override public void addException(String corpusName, Throwable ex) { if (!exceptions.containsKey(corpusName)) { exceptions.put(corpusName, new ArrayList<Throwable>()); } exceptions.get(corpusName).add(ex); } @Override public void setStatus(boolean status) { this.status = status; } @Override public void add(ImportStatus importStats) { if (importStats == null) { return; } status &= importStats.getStatus(); exceptions.putAll(importStats.getAllThrowable()); } @Override public List<Exception> getExceptions() { List<Exception> exs = new ArrayList<>(); if (exceptions != null) { for (List<Throwable> throwables : exceptions.values()) { for (Throwable throwable : throwables) { if (throwable instanceof Exception) { exs.add((Exception) throwable); } } } } return exs; } @Override public Map<String, List<Throwable>> getAllThrowable() { return this.exceptions; } @Override public String printMessages() { StringBuilder txtMessages = new StringBuilder(); for (Entry<String, List<Throwable>> e : exceptions.entrySet()) { txtMessages.append(SEPERATOR); txtMessages.append("Error in corpus: ").append(e.getKey()).append("\n"); txtMessages.append(SEPERATOR); for (Throwable th : e.getValue()) { Exception exception = (Exception) th; txtMessages.append(exception.getLocalizedMessage()).append("\n"); } } return txtMessages.toString(); } @Override public String printDetails() { StringBuilder details = new StringBuilder(); for (Entry<String, List<Throwable>> e : exceptions.entrySet()) { details.append(SEPERATOR); details.append("Error in corpus: ").append(e.getKey()).append("\n"); details.append(SEPERATOR); for (Throwable th : e.getValue()) { details.append(th.getLocalizedMessage()).append("\n"); StackTraceElement[] st = th.getStackTrace(); for (int i = 0; i < st.length; i++) { details.append(st[i].toString()); details.append("\n"); } } } return details.toString(); } @Override public String printType() { StringBuilder type = new StringBuilder(); for (Entry<String, List<Throwable>> e : exceptions.entrySet()) { String name = e.getKey().split("/")[e.getKey().split("/").length - 1]; type.append("(").append(name).append(": "); for (Throwable th : e.getValue()) { type.append(th.getClass().getSimpleName()).append(" "); } type.append(") "); } return type.toString(); } } public void sendImportStatusMail(String adress, String corpusPath, ImportJob.Status status, String additionalInfo) { if (adress == null || corpusPath == null) { return; } // check valid properties if (statusMailSender == null || statusMailSender.isEmpty()) { log.warn("Could not send status mail because \"annis.mail-sender\" " + "property was not configured in conf/annis-service-properties."); return; } try { SimpleEmail mail = new SimpleEmail(); List<InternetAddress> to = new LinkedList<>(); to.add(new InternetAddress(adress)); StringBuilder sbMsg = new StringBuilder(); sbMsg.append("Dear Sir or Madam,\n"); sbMsg.append("\n"); sbMsg.append("this is the requested status update to the ANNIS corpus import " + "you have started. Please note that this message is automated and " + "if you have any question regarding the import you have to ask the " + "administrator of the ANNIS instance directly.\n\n"); mail.setTo(to); if (status == ImportJob.Status.SUCCESS) { mail.setSubject("ANNIS import finished successfully (" + corpusPath + ")"); sbMsg.append("Status:\nThe corpus \"").append(corpusPath) .append("\" was successfully imported and can be used from now on.\n"); } else if (status == ImportJob.Status.ERROR) { mail.setSubject("ANNIS import *failed* (" + corpusPath + ")"); sbMsg.append("Status:\nUnfortunally the corpus \"").append(corpusPath) .append("\" could not be imported successfully. " + "You may ask the administrator of the ANNIS installation for " + "assistance why the corpus import failed.\n"); } else if (status == ImportJob.Status.RUNNING) { mail.setSubject("ANNIS import started (" + corpusPath + ")"); sbMsg.append("Status:\nThe import of the corpus \"").append(corpusPath).append("\" was started.\n"); } else if (status == ImportJob.Status.WAITING) { mail.setSubject("ANNIS import was scheduled (" + corpusPath + ")"); sbMsg.append("Status:\nThe import of the corpus \"").append(corpusPath) .append("\" was scheduled and is currently waiting for other imports to " + "finish. As soon as the previous imports are finished this import " + "job will be executed.\n"); } else { // we don't know how to handle this, just don't send a message return; } if (additionalInfo != null && !additionalInfo.isEmpty()) { sbMsg.append("Addtional information:\n"); sbMsg.append(additionalInfo).append("\n"); } sbMsg.append("\n\nSincerely yours,\n\nthe ANNIS import service."); mail.setMsg(sbMsg.toString()); mail.setHostName("localhost"); mail.setFrom(statusMailSender); mail.send(); log.info("Send status ({}) mail to {}.", new String[] { status.name(), adress }); } catch (AddressException | EmailException ex) { log.warn("Could not send mail: " + ex.getMessage()); } } public void sendCopyStatusMail(String adress, String origDBFile, ImportJob.Status status, String additionalInfo) { if (adress == null || origDBFile == null) { return; } // check valid properties if (statusMailSender == null || statusMailSender.isEmpty()) { log.warn("Could not send status mail because \"annis.mail-sender\" " + "property was not configured in conf/annis-service-properties."); return; } try { SimpleEmail mail = new SimpleEmail(); List<InternetAddress> to = new LinkedList<>(); to.add(new InternetAddress(adress)); StringBuilder sbMsg = new StringBuilder(); sbMsg.append("Dear Sir or Madam,\n"); sbMsg.append("\n"); sbMsg.append("this is the requested status update to the ANNIS corpus import " + "you have started. Please note that this message is automated and " + "if you have any question regarding the import you have to ask the " + "administrator of the ANNIS instance directly.\n\n"); mail.setTo(to); if (status == ImportJob.Status.SUCCESS) { mail.setSubject("ANNIS copy finished successfully (" + origDBFile + ")"); sbMsg.append("Status:\nThe corpora from \"").append(origDBFile) .append("\" were successfully imported and can be used from now on.\n"); } else if (status == ImportJob.Status.ERROR) { mail.setSubject("ANNIS copy *failed* (" + origDBFile + ")"); sbMsg.append("Status:\nUnfortunally the corpora from \"").append(origDBFile) .append("\" could not be imported successfully. " + "You may ask the administrator of the ANNIS installation for " + "assistance why the corpus import failed.\n"); } else if (status == ImportJob.Status.RUNNING) { mail.setSubject("ANNIS copy started (" + origDBFile + ")"); sbMsg.append("Status:\nThe import of the corpora from \"").append(origDBFile) .append("\" was started.\n"); } else if (status == ImportJob.Status.WAITING) { mail.setSubject("ANNIS copy was scheduled (" + origDBFile + ")"); sbMsg.append("Status:\nThe import of the corpora from \"").append(origDBFile) .append("\" was scheduled and is currently waiting for other imports to " + "finish. As soon as the previous imports are finished this copy " + "job will be executed.\n"); } else { // we don't know how to handle this, just don't send a message return; } if (additionalInfo != null && !additionalInfo.isEmpty()) { sbMsg.append("Addtional information:\n"); sbMsg.append(additionalInfo).append("\n"); } sbMsg.append("\n\nSincerely yours,\n\nthe ANNIS import service."); mail.setMsg(sbMsg.toString()); mail.setHostName("localhost"); mail.setFrom(statusMailSender); mail.send(); log.info("Send status ({}) mail to {}.", new String[] { status.name(), adress }); } catch (AddressException | EmailException ex) { log.warn("Could not send mail: " + ex.getMessage()); } } public boolean checkDatabaseSchemaVersion() { try { administrationDao.checkDatabaseSchemaVersion(); } catch (AnnisException ex) { return false; } return true; } /** * Imports several corpora. * * @param overwrite if false, a conflicting top level corpus is silently * skipped. * @param aliasName An common alias name for all imported corpora or null * @param statusEmailAdress If not null the email adress of the user who * started the import. * @param waitForOtherTasks If true wait for other imports to finish, if false * abort the import. * @param paths the paths to the corpora * @return True if all corpora where imported successfully. */ public ImportStatus importCorporaSave(boolean overwrite, String aliasName, String statusEmailAdress, boolean waitForOtherTasks, String... paths) { return importCorporaSave(overwrite, aliasName, statusEmailAdress, waitForOtherTasks, Arrays.asList(paths)); } public List<Map<String, Object>> listCorpusStats() { return administrationDao.listCorpusStats(); } public List<Map<String, Object>> listCorpusStats(File databaseProperties) { return administrationDao.listCorpusStats(databaseProperties); } public List<String> listUsedIndexes() { return administrationDao.listUsedIndexes(); } public List<String> listUnusedIndexes() { return administrationDao.listUnusedIndexes(); } public boolean copyFromOtherInstance(File dbProperties, boolean overwrite, String mail) { if (dbProperties.isFile() && dbProperties.canRead()) { // find the corpus paths List<Map<String, Object>> corpora = listCorpusStats(dbProperties); List<String> corpusPaths = new LinkedList<>(); for (Map<String, Object> c : corpora) { String sourcePath = (String) c.get("source_path"); if (sourcePath != null) { corpusPaths.add(sourcePath); } } if (corpusPaths.isEmpty()) { log.warn("No corpora found"); return true; } else { log.info("The following corpora will be imported:\n" + "---------------\n" + "{}\n" + "---------------\n", Joiner.on("\n").join(corpusPaths)); sendCopyStatusMail(mail, dbProperties.getAbsolutePath(), ImportJob.Status.RUNNING, "The following corpora will be imported:\n" + "---------------\n" + Joiner.on("\n").join(corpusPaths) + "\n" + "---------------\n"); // remember the corpus alias table Multimap<String, String> corpusAlias = administrationDao.listCorpusAlias(dbProperties); //import each corpus ImportStatus status = importCorporaSave(overwrite, null, null, false, corpusPaths); // report the successful or failure failed Set<String> successfullCorpora = new LinkedHashSet<>(corpusPaths); Set<String> failedCorpora = new LinkedHashSet<>(status.getAllThrowable().keySet()); successfullCorpora.removeAll(failedCorpora); log.info("copying corpus aliases"); for (Map.Entry<String, String> e : corpusAlias.entries()) { administrationDao.addCorpusAlias(e.getValue(), e.getKey()); } if (failedCorpora.isEmpty()) { log.info("All corpora imported without errors:\n" + "---------------\n" + "{}\n" + "---------------\n", Joiner.on("\n").join(successfullCorpora)); sendCopyStatusMail(mail, dbProperties.getAbsolutePath(), ImportJob.Status.SUCCESS, "All corpora imported without errors:\n" + "---------------\n" + Joiner.on("\n").join(corpusPaths) + "\n" + "---------------\n"); return true; } else { log.error( "Errors occured during import, not all corpora have been imported.\n" + "---------------\n" + "Success:\n" + "{}\n" + "---------------\n" + "Failed:\n" + "{}\n" + "---------------\n", Joiner.on("\n").join(successfullCorpora), Joiner.on("\n").join(failedCorpora)); sendCopyStatusMail(mail, dbProperties.getAbsolutePath(), ImportJob.Status.ERROR, "Errors occured during import, not all corpora have been imported.\n" + "---------------\n" + "Success:\n" + Joiner.on("\n").join(successfullCorpora) + "\n" + "---------------\n" + "Failed:\n" + Joiner.on("\n").join(failedCorpora) + "\n" + "---------------\n"); } } } else { log.error("Can not read the database configuration file {}", dbProperties.getAbsolutePath()); } return false; } ///// Helper protected void writeDatabasePropertiesFile(String host, String port, String database, String user, String password, boolean useSSL, String schema) { File file = new File(System.getProperty("annis.home") + "/conf", "database.properties"); try (BufferedWriter writer = new BufferedWriter(new FileWriterWithEncoding(file, "UTF-8"));) { writer.write("# database configuration\n"); writer.write("datasource.driver=org.postgresql.Driver\n"); writer.write("datasource.url=jdbc:postgresql://" + host + ":" + port + "/" + database + "\n"); writer.write("datasource.username=" + user + "\n"); writer.write("datasource.password=" + password + "\n"); writer.write("datasource.ssl=" + (useSSL ? "true" : "false") + "\n"); if (schema != null) { writer.write("datasource.schema=" + schema + "\n"); } } catch (IOException e) { log.error("Couldn't write database properties file", e); throw new FileAccessException(e); } log.info("Wrote database configuration to " + file.getAbsolutePath()); } ///// Getter / Setter public AdministrationDao getAdministrationDao() { return administrationDao; } public void setAdministrationDao(AdministrationDao administrationDao) { this.administrationDao = administrationDao; } public String getStatusMailSender() { return statusMailSender; } public void setStatusMailSender(String statusMailSender) { this.statusMailSender = statusMailSender; } public SchemeFixer getSchemeFixer() { return schemeFixer; } public void setSchemeFixer(SchemeFixer schemeFixer) { this.schemeFixer = schemeFixer; } public DeleteCorpusDao getDeleteCorpusDao() { return deleteCorpusDao; } public void setDeleteCorpusDao(DeleteCorpusDao deleteCorpusDao) { this.deleteCorpusDao = deleteCorpusDao; } }