marytts.tools.dbselection.DatabaseSelector.java Source code

Java tutorial

Introduction

Here is the source code for marytts.tools.dbselection.DatabaseSelector.java

Source

/**
 * Copyright 2007 DFKI GmbH.
 * All Rights Reserved.  Use is subject to license terms.
 *
 * This file is part of MARY TTS.
 *
 * MARY TTS is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published by
 * the Free Software Foundation, version 3 of the License.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 */
package marytts.tools.dbselection;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.net.UnknownHostException;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.LinkedHashSet;
import java.util.Set;

import javax.sound.sampled.UnsupportedAudioFileException;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerException;

import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.traversal.TreeWalker;
import org.xml.sax.SAXException;

import marytts.client.MaryClient;
import marytts.datatypes.MaryData;
import marytts.datatypes.MaryDataType;
import marytts.datatypes.MaryXML;
import marytts.features.FeatureDefinition;
import marytts.server.Mary;
import marytts.util.Pair;
import marytts.util.dom.MaryDomUtils;
import marytts.util.http.Address;
import org.apache.commons.io.FileUtils;

/**
 * Main class to be run over a database for selection
 * 
 * @author Anna Hunecke
 *
 */
public class DatabaseSelector {

    private static String locale;
    //the feature definition for the feature vectors
    public static FeatureDefinition featDef;
    //the file containing the feature definition
    private static String featDefFileName;
    //the file containing the coverage data needed to initialise the algorithm
    private static String initFileName;
    //the directory to print the selection results to
    private static String selectionDirName;
    //the config file for the coverage definition
    private static String covDefConfigFileName;
    //the stop criterion (as string)
    private static String stopCriterion;
    //the log file to log the result to
    private static String overallLogFile;
    //if true, feature vectors are kept in memory
    private static boolean holdVectorsInMemory;
    //if true, print more information to command line
    private static boolean verbose;
    //if true, print a table containing the coverage 
    //development over time
    private static boolean logCovDevelopment;
    //private static List of selected sentences ids;
    private static Set<Integer> selectedIdSents;
    private static Set<Integer> unwantedIdSents;

    private static String selectedSentencesTableName;
    private static String tableDescription;
    // mySql database
    protected static DBHandler wikiToDB;
    private static String mysqlHost;
    private static String mysqlDB;
    private static String mysqlUser;
    private static String mysqlPasswd;
    private static boolean considerOnlyReliableSentences;

    /**
     * Main method to be run from the directory where the data is.
     * Expects already computed unit features in directory unitfeatures
     * 
     * @param args the command line args (see printUsage for details)
     */
    public static void main(String[] args) throws Exception {
        main2(args);
    }

    /**
     * Main method to be run from the directory where the data is.
     * Expects already computed unit features in directory unitfeatures.
     * 
     * @param args the command line args (see printUsage for details)
     * 
     * @return the array of feature vectors used in the current pass
     */
    public static void main2(String[] args) throws Exception {
        /* Sort out the filenames and dirs for the logfiles */
        System.out.println("Starting Database Selection...");

        long time = System.currentTimeMillis();
        PrintWriter logOut;

        String dateString = "", dateDir = "";
        DateFormat fullDate = new SimpleDateFormat("dd_MM_yyyy_HH_mm_ss");
        DateFormat day = new SimpleDateFormat("dd_MM_yyyy");
        Date date = new Date();
        dateString = fullDate.format(date);
        dateDir = day.format(date);

        System.out.println("Reading arguments ...");
        StringBuffer logBuf = new StringBuffer();
        if (!readArgs(args, logBuf)) {
            throw new Exception("Something wrong with the arguments.");
        }

        //make sure the stop criterion is allright
        SelectionFunction selFunc = new SelectionFunction();
        if (!selFunc.stopIsOkay(stopCriterion)) {
            System.out.println("Stop criterion format is wrong: " + stopCriterion);
            printUsage();
            throw new Exception("Stop criterion format is wrong: " + stopCriterion);
        }

        //make various dirs
        File selectionDir = new File(selectionDirName);
        if (!selectionDir.exists())
            selectionDir.mkdir();
        File dateDirFile = new File(selectionDirName + dateDir);
        if (!dateDirFile.exists())
            dateDirFile.mkdir();

        //open log file
        String filename = selectionDirName + dateDir + "/selectionLog_" + dateString + ".txt";
        try {
            logOut = new PrintWriter(new BufferedWriter(new FileWriter(new File(filename))), true);
        } catch (Exception e) {
            e.printStackTrace();
            throw new Exception("Error opening logfile");
        }
        //print date and arguments to log file
        logOut.println("Date: " + dateString);
        logOut.println(logBuf.toString());

        wikiToDB = new DBHandler(locale);

        // Check if name of selectedSentencesTable has to be changed
        if (selectedSentencesTableName != null)
            wikiToDB.setSelectedSentencesTableName(selectedSentencesTableName);
        else
            System.out.println("Current selected sentences table name = " + selectedSentencesTableName);

        // If connection succeed
        if (wikiToDB.createDBConnection(mysqlHost, mysqlDB, mysqlUser, mysqlPasswd)) {

            /* Read in the feature definition */
            System.out.println("\nLoading feature definition...");
            try {
                BufferedReader uttFeats = new BufferedReader(
                        new InputStreamReader(new FileInputStream(new File(featDefFileName)), "UTF-8"));
                featDef = new FeatureDefinition(uttFeats, false);
                uttFeats.close();
                System.out.println(
                        "TARGETFEATURES:" + featDef.getNumberOfFeatures() + " =  " + featDef.getFeatureNames());
            } catch (Exception e) {
                e.printStackTrace();
                throw new Exception("Error opening featureDefinition file");
            }

            System.out.println("Getting a list of ids for all the sentences in the DB...");
            System.out.println("(if the number of sentences is large, this can take a while)");
            System.out.println();
            String condition = null;
            if (considerOnlyReliableSentences) {
                condition = "reliable=true";
            }
            CoverageFeatureProvider cfp;
            if (holdVectorsInMemory) {
                /* Load the feature vectors from the database */
                System.out.println("Will also load feature vectors into memory (increase memory if this fails)");
                Pair<int[], byte[][]> pair = wikiToDB.getIdsAndFeatureVectors("dbselection", condition);
                int[] sentenceIDs = pair.getFirst();
                byte[][] vectorArray = pair.getSecond();
                cfp = new InMemoryCFProvider(vectorArray, sentenceIDs);
            } else {
                cfp = new DatabaseCFProvider(wikiToDB, condition);
            }

            /* Initialise the coverage definition */
            System.out.println("\nInitiating coverage...");
            CoverageDefinition covDef = new CoverageDefinition(featDef, cfp, covDefConfigFileName);

            // If the selectedSentencesTable is new, (does not exist) then a new table
            // will be created, the selected field in the dbselection table will be initialised to selected=false. 
            // The sentences already marke in this db as unwanted=true will be kept. 
            wikiToDB.createSelectedSentencesTable(stopCriterion, featDefFileName, covDefConfigFileName);
            // With the information provided by the user
            wikiToDB.setTableDescription(wikiToDB.getSelectedSentencesTableName(), tableDescription, stopCriterion,
                    featDefFileName, covDefConfigFileName);

            long startTime = System.currentTimeMillis();
            File covSetFile = new File(initFileName);
            boolean readCovFromFile = true;
            if (!covSetFile.exists()) {
                //coverage has to be initialised
                readCovFromFile = false;
                covDef.initialiseCoverage();
                System.out.println("\nWriting coverage to file " + initFileName);
                covDef.writeCoverageBin(initFileName);
            } else {
                condition = null;
                if (considerOnlyReliableSentences) {
                    condition = "reliable=true";
                }
                int[] idSentenceList = wikiToDB.getIdListOfType("dbselection", condition);
                covDef.readCoverageBin(initFileName, idSentenceList);
            }

            /* add already selected sentences to cover */
            System.out.println("\nAdd to cover already selected sentences marked as unwanted=false.");
            selectedIdSents = new LinkedHashSet<Integer>();
            addSelectedSents(selectedSentencesTableName, covDef);

            /* remove unwanted sentences from basename list */
            System.out.println("\nRemoving selected sentences marked as unwanted=true.");
            unwantedIdSents = new LinkedHashSet<Integer>();
            removeUnwantedSentences(selectedSentencesTableName);

            long startDuration = System.currentTimeMillis() - startTime;
            if (verbose)
                System.out.println("Startup took " + startDuration + " milliseconds");
            logOut.println("Startup took " + startDuration + " milliseconds");

            /* print text corpus statistics */
            if (!readCovFromFile) {
                //only print if we did not read from file
                filename = selectionDirName + "textcorpus_distribution.txt";
                System.out.println("Printing text corpus statistics to " + filename + "...");
                PrintWriter out = null;
                try {
                    out = new PrintWriter(new FileWriter(new File(filename)), true);
                    covDef.printTextCorpusStatistics(out);
                } catch (Exception e) {
                    e.printStackTrace();
                    throw new Exception("Error printing statistics");
                } finally {
                    out.close();
                }
            }

            //print settings of the coverage definition to log file 
            covDef.printSettings(logOut);

            /* Start the algorithm */
            System.out.println("\nSelecting sentences...");

            // If it is not already running (could happen when SynthesisScriptGUI is used)
            // Start builtin MARY TTS in order to get and save the transcription 
            // of the selected sentences (selected_text_transcription.log)
            if (Mary.currentState() == Mary.STATE_OFF) {
                System.out.print("Starting builtin MARY TTS...");
                Mary.startup();
                System.out.println(" MARY TTS started.");
            }

            //selFunc.select(selectedSents,covDef,logOut,basenameList,holdVectorsInMemory,verbose);
            selFunc.select(selectedIdSents, unwantedIdSents, covDef, logOut, cfp, verbose, wikiToDB);

            /* Store list of selected files */
            filename = selectionDirName + dateDir + "/selectionResult_" + dateString + ".txt";
            //storeResult(filename,selectedSents);
            storeResult(filename, selectedIdSents);

            /* print statistics */
            System.out.println("Printing selection distribution and table...");
            String disFile = selectionDirName + dateDir + "/selectionDistribution_" + dateString + ".txt";
            String devFile = selectionDirName + dateDir + "/selectionDevelopment_" + dateString + ".txt";
            try {
                covDef.printSelectionDistribution(disFile, devFile, logCovDevelopment);
            } catch (Exception e) {
                e.printStackTrace();
                throw new Exception("Error printing statistics");
            }

            if (overallLogFile != null) {
                //append results to end of overall log file
                PrintWriter overallLogOut = new PrintWriter(
                        new OutputStreamWriter(new FileOutputStream(new File(overallLogFile), true), "UTF-8"),
                        true);
                overallLogOut.println("*******************************\n" + "Results for " + dateString + ":");

                //overallLogOut.println("number of basenames "+basenameList.length);
                overallLogOut.println("number of basenames " + cfp.getNumSentences());

                overallLogOut.println("Stop criterion " + stopCriterion);
                covDef.printResultToLog(overallLogOut);
                overallLogOut.close();
            }

            //print timing information
            long elapsedTime = System.currentTimeMillis() - time;
            double minutes = (double) elapsedTime / (double) 1000 / (double) 60;
            System.out.println("Selection took " + minutes + " minutes(" + elapsedTime + " milliseconds)");
            logOut.println("Selection took " + minutes + " minutes (" + elapsedTime + " milliseconds)");
            logOut.flush();
            logOut.close();

            wikiToDB.closeDBConnection();
            System.out.println("All done!");

        } else { // connection did not succeed
            System.out.println("\nERROR: Problems with connection to the DB, please check the mysql parameters.");
            throw new Exception("ERROR: Problems with connection to the DB, please check the mysql parameters.");
        }

    }

    /**
     * Read and check the command line arguments
     * 
     * @param args the arguments
     * @param log a StringBufffer for logging
     * @return true if args can be parsed and all essential args are there,
     *         false otherwise 
     */
    private static boolean readArgs(String[] args, StringBuffer log) throws Exception {
        //initialise default values
        String currentDir = System.getProperty("user.dir");
        String maryBaseDir = System.getenv("MARY_BASE");
        System.out.println("Current directory: " + currentDir + "  MARY_BASE=" + maryBaseDir);

        locale = null;
        selectionDirName = null;
        initFileName = null;
        covDefConfigFileName = null;
        featDefFileName = null;
        overallLogFile = null;
        holdVectorsInMemory = true;
        verbose = false;
        logCovDevelopment = false;
        mysqlHost = null;
        mysqlDB = null;
        mysqlUser = null;
        mysqlPasswd = null;
        selectedSentencesTableName = null;
        tableDescription = "";
        considerOnlyReliableSentences = true;
        stopCriterion = null;

        // Default values for
        holdVectorsInMemory = true;
        verbose = false;
        logCovDevelopment = false;

        int i = 0;
        int numEssentialArgs = 0;

        //loop over args
        while (args.length > i) {
            if (args[i].equals("-locale")) {
                if (args.length > i + 1) {
                    i++;
                    locale = args[i];
                    log.append("locale : " + args[i] + "\n");
                    System.out.println("  locale : " + args[i]);
                    numEssentialArgs++;
                } else {
                    System.out.println("No locale.");
                    printUsage();
                    return false;
                }
                i++;
                continue;
            }
            if (args[i].equals("-mysqlHost")) {
                if (args.length > i + 1) {
                    i++;
                    mysqlHost = args[i];
                    log.append("mysqlHost : " + args[i] + "\n");
                    System.out.println("  mysqlHost : " + args[i]);
                    numEssentialArgs++;
                } else {
                    System.out.println("No mysqlHost.");
                    printUsage();
                    return false;
                }
                i++;
                continue;
            }
            if (args[i].equals("-mysqlDB")) {
                if (args.length > i + 1) {
                    i++;
                    mysqlDB = args[i];
                    log.append("mysqlDB : " + args[i] + "\n");
                    System.out.println("  mysqlDB : " + args[i]);
                    numEssentialArgs++;
                } else {
                    System.out.println("No mysqlDB.");
                    printUsage();
                    return false;
                }
                i++;
                continue;
            }
            if (args[i].equals("-mysqlUser")) {
                if (args.length > i + 1) {
                    i++;
                    mysqlUser = args[i];
                    log.append("mysqlUser : " + args[i] + "\n");
                    System.out.println("  mysqlUser : " + args[i]);
                    numEssentialArgs++;
                } else {
                    System.out.println("No mysqlUser.");
                    printUsage();
                    return false;
                }
                i++;
                continue;
            }
            if (args[i].equals("-mysqlPasswd")) {
                if (args.length > i + 1) {
                    i++;
                    mysqlPasswd = args[i];
                    log.append("mysqlPasswd : " + args[i] + "\n");
                    System.out.println("  mysqlPasswd : " + args[i]);
                    numEssentialArgs++;
                } else {
                    System.out.println("No mysqlPasswd.");
                    printUsage();
                    return false;
                }
                i++;
                continue;
            }
            if (args[i].equals("-featDef")) {
                if (args.length > i + 1) {
                    i++;
                    featDefFileName = args[i];
                    log.append("FeatDefFileName : " + args[i] + "\n");
                    System.out.println("  FeatDefFileName : " + args[i]);
                } else {
                    System.out.println("No featDef file");
                    printUsage();
                    return false;
                }
                i++;
                continue;
            }
            if (args[i].equals("-initFile")) {
                if (args.length > i + 1) {
                    i++;
                    initFileName = args[i];
                    log.append("initFile : " + args[i] + "\n");
                    System.out.println("  initFile : " + args[i]);
                } else {
                    System.out.println("No initFile");
                    printUsage();
                    return false;
                }
                i++;
                continue;
            }
            if (args[i].equals("-tableName")) {
                if (args.length > i + 1) {
                    i++;
                    selectedSentencesTableName = args[i];
                    log.append("selectedSentencesTable name : " + args[i] + "\n");
                    System.out.println("  selectedSentencesTable name: " + args[i]);
                    numEssentialArgs++;
                } else {
                    System.out.println("No selectedSentencesTable name");
                    printUsage();
                    return false;
                }
                i++;
                continue;
            }
            if (args[i].equals("-tableDescription")) {
                if (args.length > i + 1) {
                    i++;
                    tableDescription = args[i];
                    log.append("tableDescription : " + args[i] + "\n");
                    System.out.println("  tableDescription: " + args[i]);
                } else {
                    System.out.println("No tableDescription");
                    printUsage();
                    return false;
                }
                i++;
                continue;
            }
            if (args[i].equals("-vectorsOnDisk")) {
                holdVectorsInMemory = false;
                log.append("vectorsOnDisk");
                System.out.println("  vectorsOnDisk");
                i++;
                continue;
            }
            if (args[i].equals("-verbose")) {
                verbose = true;
                log.append("verbose");
                System.out.println("  verbose");
                i++;
                continue;
            }
            if (args[i].equals("-logCoverageDevelopment")) {
                logCovDevelopment = true;
                log.append("logCoverageDevelopment");
                System.out.println("  logCoverageDevelopment");
                i++;
                continue;
            }
            if (args[i].equals("-selectionDir")) {
                if (args.length > i + 1) {
                    i++;
                    selectionDirName = args[i];
                    //make sure we have a slash at the end
                    char lastChar = selectionDirName.charAt(selectionDirName.length() - 1);
                    if (Character.isLetterOrDigit(lastChar)) {
                        selectionDirName = selectionDirName + "/";
                    }
                    log.append("selectionDir : " + args[i] + "\n");
                    System.out.println("  selectionDir : " + args[i]);
                } else {
                    System.out.println("No selectionDir");
                    printUsage();
                    return false;
                }
                i++;
                continue;
            }
            if (args[i].equals("-coverageConfig")) {
                if (args.length > i + 1) {
                    i++;
                    covDefConfigFileName = args[i];
                    log.append("coverageConfig : " + args[i] + "\n");
                    System.out.println("  coverageConfig : " + args[i]);
                } else {
                    System.out.println("No coverageConfig");
                    printUsage();
                    return false;
                }
                i++;
                continue;
            }
            if (args[i].equals("-stop")) {
                StringBuilder tmp = new StringBuilder();
                i++;
                while (args.length > i) {
                    if (args[i].startsWith("-"))
                        break;
                    tmp.append(args[i] + " ");
                    i++;
                }
                stopCriterion = tmp.toString();
                log.append("stop criterion : " + stopCriterion + "\n");
                System.out.println("  stop criterion : " + stopCriterion);
                continue;
            }
            if (args[i].equals("-overallLog")) {
                if (args.length > i + 1) {
                    i++;
                    overallLogFile = args[i];
                    log.append("overallLogFile : " + args[i] + "\n");
                    System.out.println("  overallLogFile : " + args[i]);
                } else {
                    System.out.println("No overall log file");
                    printUsage();
                    return false;
                }
                i++;
                continue;
            }
            /* It is currently not possible to use unreliable sentences.
             * The place where this can be influenced is the FeatureMaker,
             * in its setting ""
            if (args[i].equals("-reliableOnly")) { // optionally, request that only "reliable" sentences be used in selection
            considerOnlyReliableSentences = true;
            log.append("using only reliable sentences\n");
            System.out.println("using only reliable sentences");
            i++;
            continue;
            }
            */
            i++;
        }
        System.out.println();
        if (numEssentialArgs < 6) {
            //not all essential arguments were given
            System.out.println(
                    "You must at least specify locale, mysql (host,user,paswd,DB), selectedSentencesTableName");
            printUsage();
            return false;
        }
        if (selectedSentencesTableName == null) {
            System.out.println("Please provide a name for the selectedSentencesTable.");
            printUsage();
            return false;
        }

        if (stopCriterion == null) {
            stopCriterion = "numSentences 90 simpleDiphones simpleProsody";
        }

        if (selectionDirName == null) {
            selectionDirName = currentDir + "/selection/";
        }
        if (initFileName == null) {
            initFileName = currentDir + "/init.bin";
        }
        if (overallLogFile == null) {
            overallLogFile = currentDir + "/overallLog.txt";
        }

        if (featDefFileName == null) {
            // check first if there exists one in the current directory
            // if not ask the user to provide one, it should have been automatically generated by the FeatureMaker in previous step
            // See: http://mary.opendfki.de/wiki/NewLanguageSupport step 5
            System.out.println("Checking if there is [locale]_featureDefinition.txt in the current directory");
            File feaDef = new File(currentDir + "/" + locale + "_featureDefinition.txt");
            if (feaDef.exists()) {
                System.out.println("Using " + locale + "_featureDefinition.txt in current directory.");
                featDefFileName = currentDir + "/" + locale + "_featureDefinition.txt";
            } else
                System.out.println(
                        "Please provide a [locale]_featureDefinition.txt, it should have been generated by the FeatureMaker. \n"
                                + " See: http://mary.opendfki.de/wiki/NewLanguageSupport step 5.");
        }

        if (covDefConfigFileName == null) {
            // check if there is already a covDef.config file in the current directory
            // if not then copy the default covDef.config from jar archive resource (marytts/tools/dbselection/covDef.config)
            System.out.println("\nChecking if there is already a covDef.config in the current directory");
            File covDef = new File(currentDir + "/covDef.config");
            if (covDef.exists())
                System.out.println("Using covDef.config in current directory.");
            else {
                System.out.println("Copying default covDef.config file from archive");
                FileUtils.copyInputStreamToFile(DatabaseSelector.class.getResourceAsStream("covDef.config"),
                        covDef);
            }
            covDefConfigFileName = currentDir + "/covDef.config";
            System.out.println("covDefConfigFileName = " + covDefConfigFileName);

        }

        return true;
    }

    /**
     * Print usage of main method
     * to standard out
     */
    private static void printUsage() {

        System.out.println("\nUsage: "
                + "Usage: java DatabaseSelector -locale language -mysqlHost host -mysqlUser user -mysqlPasswd passwd -mysqlDB wikiDB\n"
                + "-tableName selectedSentencesTableName \n" + " [-stop stopCriterion]\n"
                + " [-featDef file -coverageConfig file]\n"
                + " [-initFile file -selectedSentences file -unwantedSentences file ]\n"
                + " [-tableDescription a brief description of the table ]\n"
                + " [-vectorsOnDisk -overallLog file -selectionDir dir -logCoverageDevelopment -verbose]\n"
                + " Arguments:\n"
                + " -tableName selectedSentencesTableName : The name of a new selection set, change this name when\n"
                + "     generating several selection sets. FINAL name will be: \"locale_name_selectedSenteces\". \n"
                + "     where name is the name provided for the selected sentences table.\n"
                + " -tableDescription : short description of the selected sentences table.\n"
                + "     Default: empty\n" + " -featDef file : The feature definition for the features\n"
                + "     Default: [locale]_featureDefinition.txt for example for US English: en_US_featureDefinition.txt\n"
                + "             this file is automatically created in previous steps by the FeatureMaker.\n"
                + " -stop stopCriterion : which stop criterion to use. There are five stop criteria. \n"
                + "     They can be used individually or can be combined:\n"
                + "     - numSentences n : selection stops after n sentences\n"
                + "     - simpleDiphones : selection stops when simple diphone coverage has reached maximum\n"
                + "     - simpleProsody : selection stops when simple prosody coverage has reached maximum\n"
                + "     Default: \"numSentences 90 simpleDiphones simpleProsody\"\n"
                + " -coverageConfig file : The config file for the coverage definition. \n"
                + "     Default: there is a default coverage config file in MARY_BASE/resources/marytts/tools/dbselection/covDef.config\n"
                + "              this file will be copied to the current directory if no file is provided.\n"
                + " -initFile file : The file containing the coverage data needed to initialise the algorithm.\n"
                + "     Default: /current_dir/init.bin\n"
                + " -overallLog file : Log file for all runs of the program: date, settings and results of the current\n"
                + "     run are appended to the end of the file. This file is needed if you want to analyse your results \n"
                + "     with the ResultAnalyser later.\n"
                + " -selectionDir dir : the directory where all selection data is stored.\n"
                + "     Default: /current_dir/selection\n"
                + " -vectorsOnDisk: if this option is given, the feature vectors are not loaded into memory during\n"
                + "     the run of the program. This notably slows down the run of the program!\n"
                + "     Default: no vectorsOnDisk\n"
                + " -logCoverageDevelopment : If this option is given, the coverage development over time is stored.\n"
                + "     Default: no logCoverageDevelopment\n"
                + " -verbose : If this option is given, there will be more output on the command line during the run of the program.\n"
                + "     Default: no verbose\n");

    }

    /***
     * Manual selection of wanted/unwanted selected sentences
     *
     */
    private static void checkSelectedSentences() {
        InputStreamReader isr = new InputStreamReader(System.in);
        BufferedReader br = new BufferedReader(isr);

        try {
            System.out.println("\nChecking selected sentences whether they are wanted or not.");
            System.out.println(" selected sentences will be saved in ./selected.log");
            PrintWriter selectedLog = new PrintWriter(new FileWriter(new File("./selected.log")));

            System.out.println(
                    " selected sentences and transcriptions will be saved in  ./selected_text_transcription.log");
            PrintWriter selected_tra_Log = new PrintWriter(
                    new FileWriter(new File("./selected_text_transcription.log")));

            System.out.println(" unwanted sentences will be saved in ./unwanted.log");
            PrintWriter unwantedLog = new PrintWriter(new FileWriter(new File("./unwanted.log")));

            int sel[] = wikiToDB.getIdListOfType("dbselection", "selected=true and unwanted=false");

            if (sel != null) {
                // checking selected sentences
                System.out.println(
                        " Select \"y\" for marking sentence as \"wanted\" otherwise \"n\" . Press any other key to finish: \n");
                String str;
                for (int i = 0; i < sel.length; i++) {
                    str = wikiToDB.getSelectedSentence(wikiToDB.getSelectedSentencesTableName(), sel[i]);
                    System.out.print("id=" + sel[i] + ":  " + str + "\n  Wanted?(y/n):");

                    String s = br.readLine();
                    if (s.contentEquals("n")) {
                        wikiToDB.setSentenceRecord(sel[i], "unwanted", true);
                        unwantedLog.println(sel[i] + " " + str);
                    } else if (s.contentEquals("y")) {
                        selectedLog.println(sel[i] + " " + str);

                        selected_tra_Log.println(sel[i] + " " + str);
                        selected_tra_Log.println(sel[i] + " <" + SelectionFunction.transcribe(str, locale) + ">");
                    } else {
                        unwantedLog.close();
                        selectedLog.close();

                        selected_tra_Log.close();

                        break;
                    }
                }
            } else
                System.out.println("There is no selected sentences in the DB.");

        } catch (Exception e) {
            System.out.println(e);
        }

    }

    /**
     * Add a list of sentences to the cover
     * Here the already selected sentences are added to the cover and the indexes removed
     * (or set to -1) in the idSentenceList
     * @param covDef the cover
     * @throws Exception
     */
    private static void addSelectedSents(String tableName, CoverageDefinition covDef) throws Exception {

        if (verbose)
            System.out.println("\nAdding previously selected sentences ...");
        int idSentenceListSelected[] = wikiToDB
                .getIdListOfSelectedSentences(wikiToDB.getSelectedSentencesTableName(), "unwanted=false");
        int id;
        byte[] vectorBuf;
        if (idSentenceListSelected != null) {
            for (int i = 0; i < idSentenceListSelected.length; i++) {
                id = idSentenceListSelected[i];
                vectorBuf = wikiToDB.getFeatures(id);

                //fill the cover set with the sentence
                covDef.updateCover(vectorBuf);

                //add the filename to the sentence list
                selectedIdSents.add((Integer) id);

            }

            /* 
            int numSelectedSents = selectedIdSents.size();
            int numRemovedSents = 0;
                     
            //loop over basename array
            // No need to mark id negative
            for (int i=0;i<idSentenceList.length;i++){
                if (selectedIdSents.contains(idSentenceList[i])){
            //remove the sentence also from the idSentenceList
            if (verbose)
              System.out.println("  Removing from idSentenceList id=" + idSentenceList[i]);
            idSentenceList[i] = -1;
            numRemovedSents++;
                }            
                if (numSelectedSents == numRemovedSents) break;
            } 
            */
            System.out.println("Added to cover " + idSentenceListSelected.length + " selected sentences");
        } else
            System.out.println("There is no already selected sentences to add to the list.");

    }

    /**
     * Remove unwanted sentences from the basename list
     * 
     * @throws Exception
     */
    private static void removeUnwantedSentences(String tableName) throws Exception {
        if (verbose)
            System.out.println("\nRemoving unwanted sentences ...");
        int idSentenceListUnwanted[] = wikiToDB
                .getIdListOfSelectedSentences(wikiToDB.getSelectedSentencesTableName(), "unwanted=true");

        int id;
        if (idSentenceListUnwanted != null) {
            for (int i = 0; i < idSentenceListUnwanted.length; i++) {
                id = idSentenceListUnwanted[i];
                // mark sentence as unwanted in the locale_dbselection table
                // this is already done when selecting unwanted with the SynthesisScriptGUI
                //wikiToDB.setSentenceRecord(id, "unwanted", true);
                unwantedIdSents.add((Integer) id);
            }
            /*    
            // remove sentences from basename list 
            int numSelectedSents = unwantedIdSents.size();
            int numRemovedSents = 0;
            // loop over basename array
            for (int i=0;i<idSentenceList.length;i++){
                if (unwantedIdSents.contains(idSentenceList[i])){
            //remove the sentence also from the idSentenceList
            if (verbose)
              System.out.println("  Removing (unwanted)from idSentenceList id=" + idSentenceList[i]);
            idSentenceList[i] = -1;
            numRemovedSents++;
                }
                if (numSelectedSents == numRemovedSents) break;
            } 
            */

            System.out.println("Removed " + idSentenceListUnwanted.length + " unwanted sentences.");

        } else
            System.out.println("There is no unwanted sentences to remove.");

    }

    /**
     * Print the list of selected files
     * 
     * @param filename the file to print to
     * @param selected the list of files
     */
    private static void storeResult(String filename, Set<Integer> selected) {

        PrintWriter out;
        try {
            out = new PrintWriter(new FileWriter(new File(filename)));
        } catch (Exception e) {
            e.printStackTrace();
            throw new Error("Error storing result");
        }
        for (int sel : selected) {
            out.println(sel);
        }
        out.flush();
        out.close();
    }

}