com.joliciel.talismane.TalismaneConfigImpl.java Source code

Java tutorial

Introduction

Here is the source code for com.joliciel.talismane.TalismaneConfigImpl.java

Source

///////////////////////////////////////////////////////////////////////////////
//Copyright (C) 2014 Joliciel Informatique
//
//This file is part of Talismane.
//
//Talismane is free software: you can redistribute it and/or modify
//it under the terms of the GNU Affero General Public License as published by
//the Free Software Foundation, either version 3 of the License, or
//(at your option) any later version.
//
//Talismane is distributed in the hope that it will be useful,
//but WITHOUT ANY WARRANTY; without even the implied warranty of
//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
//GNU Affero General Public License for more details.
//
//You should have received a copy of the GNU Affero General Public License
//along with Talismane.  If not, see <http://www.gnu.org/licenses/>.
//////////////////////////////////////////////////////////////////////////////
package com.joliciel.talismane;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.Reader;
import java.io.UnsupportedEncodingException;
import java.io.Writer;
import java.nio.charset.Charset;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import java.util.Map.Entry;
import java.util.Scanner;
import java.util.zip.ZipInputStream;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.log4j.PropertyConfigurator;

import com.joliciel.talismane.Talismane.Command;
import com.joliciel.talismane.Talismane.Mode;
import com.joliciel.talismane.Talismane.Module;
import com.joliciel.talismane.Talismane.Option;
import com.joliciel.talismane.filters.FilterService;
import com.joliciel.talismane.filters.MarkerFilterType;
import com.joliciel.talismane.filters.TextMarkerFilter;
import com.joliciel.talismane.languageDetector.LanguageDetector;
import com.joliciel.talismane.languageDetector.LanguageDetectorAnnotatedCorpusReader;
import com.joliciel.talismane.languageDetector.LanguageDetectorFeature;
import com.joliciel.talismane.languageDetector.LanguageDetectorProcessor;
import com.joliciel.talismane.languageDetector.LanguageDetectorService;
import com.joliciel.talismane.languageDetector.LanguageOutcome;
import com.joliciel.talismane.lexicon.LexicalEntryReader;
import com.joliciel.talismane.lexicon.LexiconDeserializer;
import com.joliciel.talismane.lexicon.PosTaggerLexicon;
import com.joliciel.talismane.lexicon.RegexLexicalEntryReader;
import com.joliciel.talismane.machineLearning.ClassificationEventStream;
import com.joliciel.talismane.machineLearning.ClassificationObserver;
import com.joliciel.talismane.machineLearning.ClassificationModel;
import com.joliciel.talismane.machineLearning.ExternalResource;
import com.joliciel.talismane.machineLearning.ExternalResourceFinder;
import com.joliciel.talismane.machineLearning.ExternalWordList;
import com.joliciel.talismane.machineLearning.MachineLearningAlgorithm;
import com.joliciel.talismane.machineLearning.MachineLearningModel;
import com.joliciel.talismane.machineLearning.MachineLearningService;
import com.joliciel.talismane.machineLearning.MachineLearningSession;
import com.joliciel.talismane.machineLearning.linearsvm.LinearSVMModelTrainer;
import com.joliciel.talismane.machineLearning.linearsvm.LinearSVMModelTrainer.LinearSVMSolverType;
import com.joliciel.talismane.machineLearning.maxent.MaxentModelTrainer;
import com.joliciel.talismane.machineLearning.perceptron.PerceptronClassificationModelTrainer;
import com.joliciel.talismane.machineLearning.perceptron.PerceptronService.PerceptronScoring;
import com.joliciel.talismane.output.FreemarkerTemplateWriter;
import com.joliciel.talismane.parser.ParseComparator;
import com.joliciel.talismane.parser.ParseComparisonStrategy;
import com.joliciel.talismane.parser.ParseConfigurationProcessor;
import com.joliciel.talismane.parser.ParseEvaluationFScoreCalculator;
import com.joliciel.talismane.parser.ParseEvaluationGuessTemplateWriter;
import com.joliciel.talismane.parser.ParseEvaluationObserverImpl;
import com.joliciel.talismane.parser.ParseEvaluationSentenceWriter;
import com.joliciel.talismane.parser.Parser;
import com.joliciel.talismane.parser.Parser.ParseComparisonStrategyType;
import com.joliciel.talismane.parser.ParseConfigurationProcessorChain;
import com.joliciel.talismane.parser.ParserAnnotatedCorpusReader;
import com.joliciel.talismane.parser.ParserEvaluator;
import com.joliciel.talismane.parser.ParserFScoreCalculatorByDistance;
import com.joliciel.talismane.parser.ParserRegexBasedCorpusReader;
import com.joliciel.talismane.parser.ParserService;
import com.joliciel.talismane.parser.ParsingConstrainer;
import com.joliciel.talismane.parser.Transition;
import com.joliciel.talismane.parser.TransitionBasedParser;
import com.joliciel.talismane.parser.TransitionSystem;
import com.joliciel.talismane.parser.features.ParseConfigurationFeature;
import com.joliciel.talismane.parser.features.ParserFeatureService;
import com.joliciel.talismane.parser.features.ParserRule;
import com.joliciel.talismane.posTagger.NonDeterministicPosTagger;
import com.joliciel.talismane.posTagger.PosTag;
import com.joliciel.talismane.posTagger.PosTagAnnotatedCorpusReader;
import com.joliciel.talismane.posTagger.PosTagComparator;
import com.joliciel.talismane.posTagger.PosTagEvaluationFScoreCalculator;
import com.joliciel.talismane.posTagger.PosTagEvaluationLexicalCoverageTester;
import com.joliciel.talismane.posTagger.PosTagEvaluationSentenceWriter;
import com.joliciel.talismane.posTagger.PosTagRegexBasedCorpusReader;
import com.joliciel.talismane.posTagger.PosTagSequenceProcessor;
import com.joliciel.talismane.posTagger.PosTagSet;
import com.joliciel.talismane.posTagger.PosTagger;
import com.joliciel.talismane.posTagger.PosTaggerEvaluator;
import com.joliciel.talismane.posTagger.PosTaggerGuessTemplateWriter;
import com.joliciel.talismane.posTagger.PosTaggerService;
import com.joliciel.talismane.posTagger.features.PosTaggerFeature;
import com.joliciel.talismane.posTagger.features.PosTaggerFeatureService;
import com.joliciel.talismane.posTagger.features.PosTaggerRule;
import com.joliciel.talismane.posTagger.filters.PosTagFilterService;
import com.joliciel.talismane.posTagger.filters.PosTagSequenceFilter;
import com.joliciel.talismane.sentenceDetector.SentenceDetector;
import com.joliciel.talismane.sentenceDetector.SentenceDetectorAnnotatedCorpusReader;
import com.joliciel.talismane.sentenceDetector.SentenceDetectorEvaluator;
import com.joliciel.talismane.sentenceDetector.SentenceDetectorOutcome;
import com.joliciel.talismane.sentenceDetector.SentenceDetectorService;
import com.joliciel.talismane.sentenceDetector.SentenceProcessor;
import com.joliciel.talismane.sentenceDetector.features.SentenceDetectorFeature;
import com.joliciel.talismane.sentenceDetector.features.SentenceDetectorFeatureService;
import com.joliciel.talismane.tokeniser.TokenComparator;
import com.joliciel.talismane.tokeniser.TokenEvaluationCorpusWriter;
import com.joliciel.talismane.tokeniser.TokenEvaluationFScoreCalculator;
import com.joliciel.talismane.tokeniser.TokenEvaluationObserver;
import com.joliciel.talismane.tokeniser.TokenRegexBasedCorpusReader;
import com.joliciel.talismane.tokeniser.TokenSequenceProcessor;
import com.joliciel.talismane.tokeniser.Tokeniser;
import com.joliciel.talismane.tokeniser.Tokeniser.TokeniserType;
import com.joliciel.talismane.tokeniser.TokeniserAnnotatedCorpusReader;
import com.joliciel.talismane.tokeniser.TokeniserEvaluator;
import com.joliciel.talismane.tokeniser.TokeniserGuessTemplateWriter;
import com.joliciel.talismane.tokeniser.TokeniserOutcome;
import com.joliciel.talismane.tokeniser.TokeniserService;
import com.joliciel.talismane.tokeniser.features.TokenFeatureService;
import com.joliciel.talismane.tokeniser.features.TokenPatternMatchFeature;
import com.joliciel.talismane.tokeniser.features.TokeniserContextFeature;
import com.joliciel.talismane.tokeniser.filters.TokenFilter;
import com.joliciel.talismane.tokeniser.filters.TokenFilterService;
import com.joliciel.talismane.tokeniser.filters.TokenSequenceFilter;
import com.joliciel.talismane.tokeniser.patterns.PatternTokeniser;
import com.joliciel.talismane.tokeniser.patterns.TokeniserPatternManager;
import com.joliciel.talismane.tokeniser.patterns.TokeniserPatternService;
import com.joliciel.talismane.tokeniser.patterns.TokeniserPatternService.PatternTokeniserType;
import com.joliciel.talismane.utils.ArrayListNoNulls;
import com.joliciel.talismane.utils.LogUtils;
import com.joliciel.talismane.utils.io.CurrentFileProvider;
import com.joliciel.talismane.utils.io.DirectoryReader;
import com.joliciel.talismane.utils.io.DirectoryWriter;

class TalismaneConfigImpl implements TalismaneConfig {
    private static final Log LOG = LogFactory.getLog(TalismaneConfigImpl.class);
    private Command command = null;
    private Option option = null;
    private Mode mode = Mode.normal;

    private Module startModule = null;
    private Module endModule = null;
    private Module module = null;

    private LanguageDetector languageDetector;
    private SentenceDetector sentenceDetector;
    private Tokeniser tokeniser;
    private PosTagger posTagger;
    private Parser parser;

    private ParserEvaluator parserEvaluator;
    private PosTaggerEvaluator posTaggerEvaluator;
    private TokeniserEvaluator tokeniserEvaluator;
    private SentenceDetectorEvaluator sentenceDetectorEvaluator;
    private ParseComparator parseComparator;
    private PosTagComparator posTagComparator;
    private TokenComparator tokenComparator;

    private TokeniserAnnotatedCorpusReader tokenCorpusReader = null;
    private PosTagAnnotatedCorpusReader posTagCorpusReader = null;
    private ParserAnnotatedCorpusReader parserCorpusReader = null;
    private ParserAnnotatedCorpusReader parserEvaluationCorpusReader = null;
    private PosTagAnnotatedCorpusReader posTagEvaluationCorpusReader = null;
    private TokeniserAnnotatedCorpusReader tokenEvaluationCorpusReader = null;
    private SentenceDetectorAnnotatedCorpusReader sentenceCorpusReader = null;
    private LanguageDetectorAnnotatedCorpusReader languageCorpusReader = null;

    private LanguageDetectorProcessor languageDetectorProcessor;
    private SentenceProcessor sentenceProcessor;
    private TokenSequenceProcessor tokenSequenceProcessor;
    private PosTagSequenceProcessor posTagSequenceProcessor;
    private ParseConfigurationProcessor parseConfigurationProcessor;

    private ClassificationModel<TokeniserOutcome> tokeniserModel = null;
    private ClassificationModel<PosTag> posTaggerModel = null;
    private MachineLearningModel parserModel = null;

    private boolean processByDefault = true;
    private int maxSentenceCount = 0;
    private int startSentence = 0;
    private int beamWidth = 1;
    private boolean propagateBeam = true;
    private boolean includeDetails = false;
    private Charset inputCharset = null;
    private Charset outputCharset = null;

    private int tokeniserBeamWidth = 1;
    private int posTaggerBeamWidth = -1;
    private int parserBeamWidth = -1;
    private boolean propagateTokeniserBeam = false;

    private char endBlockCharacter = '\f';
    private String inputRegex;
    private String inputPatternFilePath = null;
    private String evaluationRegex;
    private String evaluationPatternFilePath = null;
    private int maxParseAnalysisTime = 60;
    private int minFreeMemory = 64;
    private boolean earlyStop = false;

    private Reader reader = null;
    private Writer writer = null;
    private Reader evaluationReader = null;

    private String inFilePath = null;
    private String inDirPath = null;
    private String outFilePath = null;
    private String outDirPath = null;
    private String parserModelFilePath = null;
    private String posTaggerModelFilePath = null;
    private String tokeniserModelFilePath = null;
    private String sentenceModelFilePath = null;
    private String languageModelFilePath = null;
    private String textFiltersPath = null;
    private String tokenFiltersPath = null;
    private String tokenSequenceFilterPath = null;
    private String posTagSequenceFilterPath = null;
    private String templatePath = null;
    private String evaluationFilePath = null;
    private String sentenceReaderPath = null;
    private String posTaggerRuleFilePath = null;
    private String posTaggerFeaturePath = null;
    private String tokeniserFeaturePath = null;
    private String tokeniserPatternFilePath = null;
    private String sentenceFeaturePath = null;
    private String languageFeaturePath = null;
    private String languageCorpusMapPath = null;

    private String lexiconPath = null;
    private boolean replaceLexicon = false;

    private String sentenceTemplateName = "sentence_template.ftl";
    private String tokeniserTemplateName = "tokeniser_template.ftl";
    private String posTaggerTemplateName = "posTagger_template.ftl";
    private String parserTemplateName = "parser_conll_template.ftl";

    private String fileName = null;
    private boolean logStats = false;
    private File outDir = null;
    private String baseName = null;
    private String suffix = "";
    private boolean outputGuesses = false;
    private int outputGuessCount = 0;
    private boolean labeledEvaluation = true;
    private boolean dynamiseFeatures = false;
    private String skipLabel = null;
    private Set<String> errorLabels = null;

    private List<PosTaggerRule> posTaggerRules = null;
    private List<ParserRule> parserRules = null;
    private String parserRuleFilePath = null;
    private String parserFeaturePath = null;
    private List<TextMarkerFilter> textMarkerFilters = null;
    private List<TokenFilter> tokenFilters = null;
    private List<TokenFilter> additionalTokenFilters = new ArrayListNoNulls<TokenFilter>();
    private List<TokenFilter> prependedTokenFilters = new ArrayListNoNulls<TokenFilter>();
    private List<TokenSequenceFilter> tokenSequenceFilters = null;
    private List<PosTagSequenceFilter> posTaggerPostProcessingFilters = null;
    private boolean includeDistanceFScores = false;
    private boolean includeTransitionLog = false;
    private boolean predictTransitions = false;
    private boolean posTaggerRulesReplace = false;
    private boolean parserRulesReplace = false;
    private boolean tokenFiltersReplace = false;
    private boolean textFiltersReplace = false;
    private boolean tokenSequenceFiltersReplace = false;

    private MarkerFilterType newlineMarker = MarkerFilterType.SENTENCE_BREAK;
    private int blockSize = 1000;

    private int crossValidationSize = -1;
    private int includeIndex = -1;
    private int excludeIndex = -1;

    private Set<String> testWords = null;
    private Set<LanguageDetectorFeature<?>> languageFeatures;
    private Set<SentenceDetectorFeature<?>> sentenceFeatures;
    private Set<TokeniserContextFeature<?>> tokeniserContextFeatures;
    private Set<TokenPatternMatchFeature<?>> tokenPatternMatchFeatures;
    private Set<PosTaggerFeature<?>> posTaggerFeatures;
    private Set<ParseConfigurationFeature<?>> parserFeatures;
    private TokeniserPatternManager tokeniserPatternManager;
    private ClassificationEventStream classificationEventStream;
    private TokeniserType tokeniserType = TokeniserType.pattern;
    private PatternTokeniserType patternTokeniserType = PatternTokeniserType.Compound;

    private boolean parserCorpusReaderFiltersAdded = false;
    private boolean posTagCorpusReaderFiltersAdded = false;
    private boolean tokenCorpusReaderFiltersAdded = false;
    private boolean parserCorpusReaderDecorated = false;

    private TalismaneServiceInternal talismaneService;
    private PosTaggerService posTaggerService;
    private ParserService parserService;
    private PosTaggerFeatureService posTaggerFeatureService;
    private ParserFeatureService parserFeatureService;
    private FilterService filterService;
    private TokenFilterService tokenFilterService;
    private SentenceDetectorService sentenceDetectorService;
    private SentenceDetectorFeatureService sentenceDetectorFeatureService;
    private MachineLearningService machineLearningService;
    private TokeniserPatternService tokeniserPatternService;
    private TokenFeatureService tokenFeatureService;
    private TokeniserService tokeniserService;
    private PosTagFilterService posTagFilterService;
    private LanguageDetectorService languageDetectorService;

    private File performanceConfigFile;
    private ParseComparisonStrategyType parseComparisonStrategyType;
    private boolean includeLexiconCoverage = false;
    private boolean includeUnknownWordResults = false;

    // server parameters
    private int port = 7272;

    // training parameters
    int iterations = 0;
    int cutoff = 0;
    MachineLearningAlgorithm algorithm = MachineLearningAlgorithm.MaxEnt;
    double constraintViolationCost = -1;
    double epsilon = -1;
    LinearSVMSolverType solverType = null;
    double perceptronTolerance = -1;
    boolean averageAtIntervals = false;
    List<Integer> perceptronObservationPoints = null;
    String dependencyLabelPath = null;
    String excludeFileName = null;

    ExternalResourceFinder externalResourceFinder = null;
    Map<String, List<String>> descriptors = null;
    String parsingConstrainerPath = null;
    ParsingConstrainer parsingConstrainer = null;
    LanguageImplementation implementation;
    TalismaneSession talismaneSession = null;

    File baseDir = null;

    boolean preloadLexicon = true;

    Locale locale = null;

    String corpusLexicalEntryRegexPath = null;

    public TalismaneConfigImpl(LanguageImplementation implementation) {
        this.implementation = implementation;
    }

    /**
     * Constructor without language implementation - requires languagePack parameter
     * or else all resources specified individually.
     */
    public TalismaneConfigImpl(String sessionId) {
        this.implementation = GenericLanguageImplementation.getInstance(sessionId);
    }

    public void loadParameters(Map<String, String> args) {
        try {
            if (args.size() == 0) {
                System.out.println("Talismane usage instructions: ");
                System.out.println("* indicates optional, + indicates default value");
                System.out.println("");
                System.out.println(
                        "Usage: command=analyse *startModule=[sentence+|tokenise|postag|parse] *endModule=[sentence|tokenise|postag|parse+] *inFile=[inFilePath, stdin if missing] *outFile=[outFilePath, stdout if missing] *template=[outputTemplatePath]");
                System.out.println("");
                System.out.println("Additional optional parameters:");
                System.out.println(
                        " *encoding=[UTF-8, ...] *includeDetails=[true|false+] posTaggerRules*=[posTaggerRuleFilePath] textFilters*=[regexFilterFilePath] *sentenceModel=[path] *tokeniserModel=[path] *posTaggerModel=[path] *parserModel=[path] *inputPatternFile=[inputPatternFilePath] *posTagSet=[posTagSetPath]");
                return;
            }

            String logConfigPath = args.get("logConfigFile");
            if (logConfigPath != null) {
                args.remove("logConfigFile");
                Properties props = new Properties();
                props.load(new FileInputStream(logConfigPath));
                PropertyConfigurator.configure(props);
            }

            String performanceConifPath = args.get("performanceConfigFile");
            if (performanceConifPath != null) {
                args.remove("performanceConfigFile");
                performanceConfigFile = this.getFile(performanceConifPath);
            }

            String encoding = null;
            String inputEncoding = null;
            String outputEncoding = null;
            String builtInTemplate = null;

            String posTagSetPath = null;
            String externalResourcePath = null;
            String transitionSystemStr = null;

            String languagePackPath = null;

            for (Entry<String, String> arg : args.entrySet()) {
                String argName = arg.getKey();
                String argValue = arg.getValue();
                if (argName.equals("command")) {
                    String commandString = argValue;
                    if (commandString.equals("analyze"))
                        commandString = "analyse";

                    command = Command.valueOf(commandString);
                } else if (argName.equals("option")) {
                    option = Option.valueOf(argValue);
                } else if (argName.equals("mode")) {
                    mode = Mode.valueOf(argValue);
                } else if (argName.equals("module")) {
                    if (argValue.equalsIgnoreCase("sentence") || argValue.equalsIgnoreCase("sentenceDetector"))
                        module = Talismane.Module.SentenceDetector;
                    else if (argValue.equalsIgnoreCase("tokenise") || argValue.equalsIgnoreCase("tokeniser"))
                        module = Talismane.Module.Tokeniser;
                    else if (argValue.equalsIgnoreCase("postag") || argValue.equalsIgnoreCase("posTagger"))
                        module = Talismane.Module.PosTagger;
                    else if (argValue.equalsIgnoreCase("parse") || argValue.equalsIgnoreCase("parser"))
                        module = Talismane.Module.Parser;
                    else if (argValue.equalsIgnoreCase("language") || argValue.equalsIgnoreCase("languageDetector"))
                        module = Talismane.Module.LanguageDetector;
                    else
                        throw new TalismaneException("Unknown module: " + argValue);
                } else if (argName.equals("startModule")) {
                    if (argValue.equalsIgnoreCase("sentence") || argValue.equalsIgnoreCase("sentenceDetector"))
                        startModule = Talismane.Module.SentenceDetector;
                    else if (argValue.equalsIgnoreCase("tokenise") || argValue.equalsIgnoreCase("tokeniser"))
                        startModule = Talismane.Module.Tokeniser;
                    else if (argValue.equalsIgnoreCase("postag") || argValue.equalsIgnoreCase("posTagger"))
                        startModule = Talismane.Module.PosTagger;
                    else if (argValue.equalsIgnoreCase("parse") || argValue.equalsIgnoreCase("parser"))
                        startModule = Talismane.Module.Parser;
                    else
                        throw new TalismaneException("Unknown startModule: " + argValue);
                } else if (argName.equals("endModule")) {
                    if (argValue.equalsIgnoreCase("sentence") || argValue.equalsIgnoreCase("sentenceDetector"))
                        endModule = Talismane.Module.SentenceDetector;
                    else if (argValue.equalsIgnoreCase("tokenise") || argValue.equalsIgnoreCase("tokeniser"))
                        endModule = Talismane.Module.Tokeniser;
                    else if (argValue.equalsIgnoreCase("postag") || argValue.equalsIgnoreCase("posTagger"))
                        endModule = Talismane.Module.PosTagger;
                    else if (argValue.equalsIgnoreCase("parse") || argValue.equalsIgnoreCase("parser"))
                        endModule = Talismane.Module.Parser;
                    else
                        throw new TalismaneException("Unknown endModule: " + argValue);
                } else if (argName.equals("inFile"))
                    inFilePath = argValue;
                else if (argName.equals("inDir"))
                    inDirPath = argValue;
                else if (argName.equals("outFile"))
                    outFilePath = argValue;
                else if (argName.equals("outDir"))
                    outDirPath = argValue;
                else if (argName.equals("template"))
                    templatePath = argValue;
                else if (argName.equals("builtInTemplate"))
                    builtInTemplate = argValue;
                else if (argName.equals("encoding")) {
                    if (inputEncoding != null || outputEncoding != null)
                        throw new TalismaneException(
                                "The parameter 'encoding' cannot be used with 'inputEncoding' or 'outputEncoding'");
                    encoding = argValue;
                } else if (argName.equals("inputEncoding")) {
                    if (encoding != null)
                        throw new TalismaneException(
                                "The parameter 'encoding' cannot be used with 'inputEncoding' or 'outputEncoding'");
                    inputEncoding = argValue;
                } else if (argName.equals("outputEncoding")) {
                    if (encoding != null)
                        throw new TalismaneException(
                                "The parameter 'encoding' cannot be used with 'inputEncoding' or 'outputEncoding'");
                    outputEncoding = argValue;
                } else if (argName.equals("includeDetails"))
                    includeDetails = argValue.equalsIgnoreCase("true");
                else if (argName.equals("propagateBeam"))
                    propagateBeam = argValue.equalsIgnoreCase("true");
                else if (argName.equals("beamWidth"))
                    beamWidth = Integer.parseInt(argValue);
                else if (argName.equals("languageModel"))
                    languageModelFilePath = argValue;
                else if (argName.equals("sentenceModel"))
                    sentenceModelFilePath = argValue;
                else if (argName.equals("tokeniserModel"))
                    tokeniserModelFilePath = argValue;
                else if (argName.equals("posTaggerModel"))
                    posTaggerModelFilePath = argValue;
                else if (argName.equals("parserModel"))
                    parserModelFilePath = argValue;
                else if (argName.equals("inputPatternFile"))
                    inputPatternFilePath = argValue;
                else if (argName.equals("inputPattern"))
                    inputRegex = argValue;
                else if (argName.equals("evaluationPatternFile"))
                    evaluationPatternFilePath = argValue;
                else if (argName.equals("evaluationPattern"))
                    evaluationRegex = argValue;
                else if (argName.equals("posTaggerRules")) {
                    if (argValue.startsWith("replace:")) {
                        posTaggerRulesReplace = true;
                        posTaggerRuleFilePath = argValue.substring("replace:".length());
                    } else {
                        posTaggerRuleFilePath = argValue;
                    }
                } else if (argName.equals("parserRules")) {
                    if (argValue.startsWith("replace:")) {
                        parserRulesReplace = true;
                        parserRuleFilePath = argValue.substring("replace:".length());
                    } else {
                        parserRuleFilePath = argValue;
                    }
                } else if (argName.equals("posTagSet"))
                    posTagSetPath = argValue;
                else if (argName.equals("textFilters")) {
                    if (argValue.startsWith("replace:")) {
                        textFiltersReplace = true;
                        textFiltersPath = argValue.substring("replace:".length());
                    } else {
                        textFiltersPath = argValue;
                    }
                } else if (argName.equals("tokenFilters")) {
                    if (argValue.startsWith("replace:")) {
                        tokenFiltersReplace = true;
                        tokenFiltersPath = argValue.substring("replace:".length());
                    } else {
                        tokenFiltersPath = argValue;
                    }
                } else if (argName.equals("tokenSequenceFilters")) {
                    if (argValue.startsWith("replace:")) {
                        tokenSequenceFiltersReplace = true;
                        tokenSequenceFilterPath = argValue.substring("replace:".length());
                    } else {
                        tokenSequenceFilterPath = argValue;
                    }
                } else if (argName.equals("posTagSequenceFilters"))
                    posTagSequenceFilterPath = argValue;
                else if (argName.equals("logStats"))
                    logStats = argValue.equalsIgnoreCase("true");
                else if (argName.equals("newline"))
                    newlineMarker = MarkerFilterType.valueOf(argValue);
                else if (argName.equals("fileName"))
                    fileName = argValue;
                else if (argName.equals("processByDefault"))
                    processByDefault = argValue.equalsIgnoreCase("true");
                else if (argName.equals("maxParseAnalysisTime"))
                    maxParseAnalysisTime = Integer.parseInt(argValue);
                else if (argName.equals("minFreeMemory"))
                    minFreeMemory = Integer.parseInt(argValue);
                else if (argName.equals("transitionSystem"))
                    transitionSystemStr = argValue;
                else if (argName.equals("sentenceCount"))
                    maxSentenceCount = Integer.parseInt(argValue);
                else if (argName.equals("startSentence"))
                    startSentence = Integer.parseInt(argValue);
                else if (argName.equals("endBlockCharCode"))
                    endBlockCharacter = (char) Integer.parseInt(argValue);
                else if (argName.equals("outputGuesses"))
                    outputGuesses = argValue.equalsIgnoreCase("true");
                else if (argName.equals("outputGuessCount"))
                    outputGuessCount = Integer.parseInt(argValue);
                else if (argName.equals("suffix"))
                    suffix = argValue;
                else if (argName.equals("includeDistanceFScores"))
                    includeDistanceFScores = argValue.equalsIgnoreCase("true");
                else if (argName.equals("includeTransitionLog"))
                    includeTransitionLog = argValue.equalsIgnoreCase("true");
                else if (argName.equals("evaluationFile"))
                    evaluationFilePath = argValue;
                else if (argName.equals("labeledEvaluation"))
                    labeledEvaluation = argValue.equalsIgnoreCase("true");
                else if (argName.equals("tokeniserBeamWidth"))
                    tokeniserBeamWidth = Integer.parseInt(argValue);
                else if (argName.equals("posTaggerBeamWidth"))
                    posTaggerBeamWidth = Integer.parseInt(argValue);
                else if (argName.equals("parserBeamWidth"))
                    parserBeamWidth = Integer.parseInt(argValue);
                else if (argName.equals("propagateTokeniserBeam"))
                    propagateTokeniserBeam = argValue.equalsIgnoreCase("true");
                else if (argName.equals("blockSize"))
                    blockSize = Integer.parseInt(argValue);
                else if (argName.equals("crossValidationSize"))
                    crossValidationSize = Integer.parseInt(argValue);
                else if (argName.equals("includeIndex"))
                    includeIndex = Integer.parseInt(argValue);
                else if (argName.equals("excludeIndex"))
                    excludeIndex = Integer.parseInt(argValue);
                else if (argName.equals("dynamiseFeatures"))
                    dynamiseFeatures = argValue.equalsIgnoreCase("true");
                else if (argName.equals("predictTransitions"))
                    predictTransitions = argValue.equalsIgnoreCase("true");
                else if (argName.equals("lexicon")) {
                    if (argValue.startsWith("replace:")) {
                        replaceLexicon = true;
                        lexiconPath = argValue.substring("replace:".length());
                    } else {
                        lexiconPath = argValue;
                    }
                } else if (argName.equals("perceptronScoring")) {
                    PerceptronScoring perceptronScoring = PerceptronScoring.valueOf(argValue);
                    MachineLearningSession.setPerceptronScoring(perceptronScoring);
                } else if (argName.equals("parseComparisonStrategy")) {
                    parseComparisonStrategyType = ParseComparisonStrategyType.valueOf(argValue);
                } else if (argName.equals("sentenceReader")) {
                    sentenceReaderPath = argValue;
                } else if (argName.equals("skipLabel")) {
                    skipLabel = argValue;
                } else if (argName.equals("errorLabels")) {
                    errorLabels = new HashSet<String>();
                    String[] labels = argValue.split(",");
                    for (String label : labels) {
                        errorLabels.add(label);
                    }
                } else if (argName.equals("earlyStop")) {
                    earlyStop = argValue.equalsIgnoreCase("true");
                } else if (argName.equals("languageFeatures")) {
                    languageFeaturePath = argValue;
                } else if (argName.equals("sentenceFeatures")) {
                    sentenceFeaturePath = argValue;
                } else if (argName.equals("tokeniserFeatures")) {
                    tokeniserFeaturePath = argValue;
                } else if (argName.equals("tokeniserPatterns")) {
                    tokeniserPatternFilePath = argValue;
                } else if (argName.equals("posTaggerFeatures")) {
                    posTaggerFeaturePath = argValue;
                } else if (argName.equals("parserFeatures")) {
                    parserFeaturePath = argValue;
                } else if (argName.equals("externalResources")) {
                    externalResourcePath = argValue;
                } else if (argName.equals("testWords")) {
                    String[] parts = argValue.split(";");
                    testWords = new HashSet<String>();
                    for (String part : parts)
                        testWords.add(part);
                } else if (argName.equals("includeLexiconCoverage")) {
                    includeLexiconCoverage = argValue.equalsIgnoreCase("true");
                } else if (argName.equals("includeUnknownWordResults")) {
                    includeUnknownWordResults = argValue.equalsIgnoreCase("true");
                } else if (argName.equals("iterations"))
                    iterations = Integer.parseInt(argValue);
                else if (argName.equals("cutoff"))
                    cutoff = Integer.parseInt(argValue);
                else if (argName.equals("dependencyLabels"))
                    dependencyLabelPath = argValue;
                else if (argName.equals("parsingConstrainer"))
                    parsingConstrainerPath = argValue;
                else if (argName.equals("algorithm"))
                    algorithm = MachineLearningAlgorithm.valueOf(argValue);
                else if (argName.equals("linearSVMSolver"))
                    solverType = LinearSVMSolverType.valueOf(argValue);
                else if (argName.equals("linearSVMCost"))
                    constraintViolationCost = Double.parseDouble(argValue);
                else if (argName.equals("linearSVMEpsilon"))
                    epsilon = Double.parseDouble(argValue);
                else if (argName.equals("perceptronTolerance"))
                    perceptronTolerance = Double.parseDouble(argValue);
                else if (argName.equals("averageAtIntervals"))
                    averageAtIntervals = argValue.equalsIgnoreCase("true");
                else if (argName.equals("perceptronObservationPoints")) {
                    String[] points = argValue.split(",");
                    perceptronObservationPoints = new ArrayListNoNulls<Integer>();
                    for (String point : points)
                        perceptronObservationPoints.add(Integer.parseInt(point));
                } else if (argName.equals("tokeniserType")) {
                    tokeniserType = TokeniserType.valueOf(argValue);
                } else if (argName.equals("patternTokeniser"))
                    patternTokeniserType = PatternTokeniserType.valueOf(argValue);
                else if (argName.equals("excludeFile")) {
                    excludeFileName = argValue;
                } else if (argName.equals("port")) {
                    port = Integer.parseInt(argValue);
                } else if (argName.equals("preloadLexicon")) {
                    preloadLexicon = argValue.equalsIgnoreCase("true");
                } else if (argName.equals("locale")) {
                    locale = Locale.forLanguageTag(argValue);
                } else if (argName.equals("languageCorpusMap")) {
                    languageCorpusMapPath = argValue;
                } else if (argName.equals("corpusLexicalEntryRegex")) {
                    corpusLexicalEntryRegexPath = argValue;
                } else if (argName.equals("languagePack")) {
                    languagePackPath = argValue;
                } else {
                    System.out.println("Unknown argument: " + argName);
                    throw new RuntimeException("Unknown argument: " + argName);
                }
            }

            if (command == null)
                throw new TalismaneException("No command provided.");

            if (!(implementation instanceof LanguagePackImplementation) && languagePackPath != null)
                throw new TalismaneException("The implementation " + implementation.getClass().getSimpleName()
                        + " does not accept language packs");

            if (implementation instanceof LanguagePackImplementation) {
                if (languagePackPath != null) {
                    File languagePackFile = this.getFile(languagePackPath);
                    if (!languagePackFile.exists())
                        throw new TalismaneException(
                                "languagePack: could not find file: " + languagePackFile.getPath());

                    LOG.debug("Setting language pack to " + languagePackFile.getPath());
                    ((LanguagePackImplementation) implementation).setLanguagePack(languagePackFile);
                }
            }

            if (command.equals(Command.evaluate)) {
                if (outDirPath.length() == 0)
                    throw new TalismaneException("Missing argument: outdir");
            }

            if (startModule == null)
                startModule = module;
            if (startModule == null)
                startModule = Module.SentenceDetector;
            if (endModule == null)
                endModule = module;
            if (endModule == null)
                endModule = Module.Parser;
            if (module == null)
                module = endModule;

            if (command == Command.train) {
                if (module == Module.LanguageDetector) {
                    if (languageModelFilePath == null)
                        throw new TalismaneException(
                                "languageModel is required when training a language detector model");
                    if (languageCorpusMapPath == null)
                        throw new TalismaneException(
                                "languageCorpusMap is required when training a language detector model");
                    if (languageFeaturePath == null)
                        throw new TalismaneException(
                                "languageFeatures is required when training a language detector model");
                } else if (module == Module.SentenceDetector) {
                    if (sentenceModelFilePath == null)
                        throw new TalismaneException(
                                "sentenceModel is required when training a sentence detector model");
                    if (sentenceFeaturePath == null)
                        throw new TalismaneException(
                                "sentenceFeatures is required when training a sentence detector model");
                } else if (module == Module.Tokeniser) {
                    if (tokeniserModelFilePath == null)
                        throw new TalismaneException("tokeniserModel is required when training a tokeniser model");
                    if (tokeniserFeaturePath == null)
                        throw new TalismaneException(
                                "tokeniserFeatures is required when training a tokeniser model");
                } else if (module == Module.PosTagger) {
                    if (posTaggerModelFilePath == null)
                        throw new TalismaneException("posTaggerModel is required when training a posTagger model");
                    if (posTaggerFeaturePath == null)
                        throw new TalismaneException(
                                "posTaggerFeatures is required when training a posTagger model");
                } else if (module == Module.Parser) {
                    this.predictTransitions = true;

                    if (parserModelFilePath == null)
                        throw new TalismaneException("parserModel is required when training a parser model");
                    if (parserFeaturePath == null)
                        throw new TalismaneException("parserFeatures is required when training a parser model");
                }
            }

            if (builtInTemplate != null) {
                if (builtInTemplate.equalsIgnoreCase("with_location")) {
                    tokeniserTemplateName = "tokeniser_template_with_location.ftl";
                    posTaggerTemplateName = "posTagger_template_with_location.ftl";
                    parserTemplateName = "parser_conll_template_with_location.ftl";
                } else if (builtInTemplate.equalsIgnoreCase("with_prob")) {
                    tokeniserTemplateName = "tokeniser_template_with_prob.ftl";
                    posTaggerTemplateName = "posTagger_template_with_prob.ftl";
                    parserTemplateName = "parser_conll_template_with_prob.ftl";
                } else if (builtInTemplate.equalsIgnoreCase("with_comments")) {
                    posTaggerTemplateName = "posTagger_template_with_comments.ftl";
                    parserTemplateName = "parser_conll_template_with_comments.ftl";
                } else {
                    throw new TalismaneException("Unknown builtInTemplate: " + builtInTemplate);
                }
            }

            if (posTaggerBeamWidth < 0)
                posTaggerBeamWidth = beamWidth;
            if (parserBeamWidth < 0)
                parserBeamWidth = beamWidth;

            inputCharset = Charset.defaultCharset();
            outputCharset = Charset.defaultCharset();
            if (encoding != null) {
                inputCharset = Charset.forName(encoding);
                outputCharset = Charset.forName(encoding);
            } else {
                if (inputEncoding != null)
                    inputCharset = Charset.forName(inputEncoding);
                if (outputEncoding != null)
                    outputCharset = Charset.forName(outputEncoding);
            }

            if (fileName == null && inFilePath != null) {
                fileName = inFilePath;
            }

            if (posTagSetPath != null) {
                File posTagSetFile = this.getFile(posTagSetPath);
                Scanner posTagSetScanner = new Scanner(new BufferedReader(
                        new InputStreamReader(new FileInputStream(posTagSetFile), this.getInputCharset().name())));

                PosTagSet posTagSet = this.getPosTaggerService().getPosTagSet(posTagSetScanner);
                talismaneSession.setPosTagSet(posTagSet);
            }

            if (transitionSystemStr != null) {
                TransitionSystem transitionSystem = null;
                if (transitionSystemStr.equalsIgnoreCase("ShiftReduce")) {
                    transitionSystem = this.getParserService().getShiftReduceTransitionSystem();
                } else if (transitionSystemStr.equalsIgnoreCase("ArcEager")) {
                    transitionSystem = this.getParserService().getArcEagerTransitionSystem();
                } else {
                    throw new TalismaneException("Unknown transition system: " + transitionSystemStr);
                }

                if (dependencyLabelPath != null) {
                    File dependencyLabelFile = this.getFile(dependencyLabelPath);
                    Scanner depLabelScanner = new Scanner(new BufferedReader(
                            new InputStreamReader(new FileInputStream(dependencyLabelFile), "UTF-8")));
                    List<String> dependencyLabels = new ArrayListNoNulls<String>();
                    while (depLabelScanner.hasNextLine()) {
                        String dependencyLabel = depLabelScanner.nextLine();
                        if (!dependencyLabel.startsWith("#"))
                            dependencyLabels.add(dependencyLabel);
                    }
                    transitionSystem.setDependencyLabels(dependencyLabels);
                }

                talismaneSession.setTransitionSystem(transitionSystem);
            }

            if (this.lexiconPath != null) {
                File lexiconFile = this.getFile(lexiconPath);
                if (!lexiconFile.exists())
                    throw new TalismaneException("lexicon: File " + lexiconPath + " does not exist");

                LexiconDeserializer lexiconDeserializer = new LexiconDeserializer(talismaneSession);
                List<PosTaggerLexicon> lexicons = lexiconDeserializer.deserializeLexicons(lexiconFile);
                for (PosTaggerLexicon oneLexicon : lexicons) {
                    talismaneSession.addLexicon(oneLexicon);
                }

                if (!replaceLexicon) {
                    List<PosTaggerLexicon> defaultLexicons = this.implementation.getDefaultLexicons();
                    if (defaultLexicons != null) {
                        for (PosTaggerLexicon oneLexicon : defaultLexicons) {
                            talismaneSession.addLexicon(oneLexicon);
                        }
                    }
                }
            }

            if (externalResourcePath != null) {
                externalResourceFinder = this.getMachineLearningService().getExternalResourceFinder();

                List<String> paths = new ArrayListNoNulls<String>();
                if (externalResourcePath != null && externalResourcePath.length() > 0) {
                    LOG.info("externalResourcePath: " + externalResourcePath);
                    String[] parts = externalResourcePath.split(";");
                    for (String part : parts)
                        paths.add(part);
                }

                for (String path : paths) {
                    LOG.info("Reading external resources from " + path);
                    if (path.length() > 0) {
                        File externalResourceFile = this.getFile(path);
                        externalResourceFinder.addExternalResources(externalResourceFile);
                    }
                }

                ExternalResourceFinder parserResourceFinder = this.getParserFeatureService()
                        .getExternalResourceFinder();
                ExternalResourceFinder posTaggerResourceFinder = this.getPosTaggerFeatureService()
                        .getExternalResourceFinder();
                ExternalResourceFinder tokeniserResourceFinder = this.getTokenFeatureService()
                        .getExternalResourceFinder();
                ExternalResourceFinder sentenceResourceFinder = this.getSentenceDetectorFeatureService()
                        .getExternalResourceFinder();
                for (ExternalResource<?> externalResource : externalResourceFinder.getExternalResources()) {
                    parserResourceFinder.addExternalResource(externalResource);
                    posTaggerResourceFinder.addExternalResource(externalResource);
                    tokeniserResourceFinder.addExternalResource(externalResource);
                    sentenceResourceFinder.addExternalResource(externalResource);
                }

                ExternalResourceFinder tokenFilterResourceFinder = this.getTokenFilterService()
                        .getExternalResourceFinder();
                for (ExternalWordList externalWordList : externalResourceFinder.getExternalWordLists()) {
                    tokenFilterResourceFinder.addExternalWordList(externalWordList);
                }
            }
        } catch (IOException e) {
            LogUtils.logError(LOG, e);
            throw new RuntimeException(e);
        }
    }

    /**
     * The actual command to run by Talismane.
     * @return
     */
    @Override
    public Command getCommand() {
        return command;
    }

    @Override
    public void setCommand(Command command) {
        this.command = command;
    }

    /**
     * If the command required a start module (e.g. analyse), the start module for this command.
     * Default is {@link com.joliciel.talismane.Talismane.Module#SentenceDetector}.
     * @return
     */
    @Override
    public Module getStartModule() {
        return startModule;
    }

    @Override
    public void setStartModule(Module startModule) {
        this.startModule = startModule;
    }

    /**
     * If the command requires an end module (e.g. analyse), the end module for this command.
     * Default is {@link com.joliciel.talismane.Talismane.Module#Parser}.
     * @return
     */
    @Override
    public Module getEndModule() {
        return endModule;
    }

    @Override
    public void setEndModule(Module endModule) {
        this.endModule = endModule;
    }

    /**
     * For commands which only affect a single module (e.g. evaluate), the module for this command.
     * @return
     */
    @Override
    public Module getModule() {
        return module;
    }

    @Override
    public void setModule(Module module) {
        this.module = module;
    }

    /**
     * When analysing, should the raw text be processed by default, or should we wait until a text
     * marker filter tells us to start processing. Default is true.
     * @return
     */
    @Override
    public boolean isProcessByDefault() {
        return processByDefault;
    }

    @Override
    public void setProcessByDefault(boolean processByDefault) {
        this.processByDefault = processByDefault;
    }

    /**
     * For the "process" command, the maximum number of sentences to process. If <=0, all sentences
     * will be processed. Default is 0 (all).
     * @return
     */
    @Override
    public int getMaxSentenceCount() {
        return maxSentenceCount;
    }

    @Override
    public void setMaxSentenceCount(int maxSentenceCount) {
        this.maxSentenceCount = maxSentenceCount;
    }

    /**
     * The charset that is used to interpret the input stream.
     * @return
     */
    @Override
    public Charset getInputCharset() {
        return inputCharset;
    }

    @Override
    public void setInputCharset(Charset inputCharset) {
        this.inputCharset = inputCharset;
    }

    /**
     * The charset that is used to write to the output stream.
     * @return
     */
    @Override
    public Charset getOutputCharset() {
        return outputCharset;
    }

    @Override
    public void setOutputCharset(Charset outputCharset) {
        this.outputCharset = outputCharset;
    }

    /**
     * A character (typically non-printing) which will mark a stop in the input stream and set-off analysis.
     * The default value is the form-feed character (code=12).
     * @return
     */
    @Override
    public char getEndBlockCharacter() {
        return endBlockCharacter;
    }

    @Override
    public void setEndBlockCharacter(char endBlockCharacter) {
        this.endBlockCharacter = endBlockCharacter;
    }

    /**
     * The beam width for beam-search analysis. Default is 1.
     * Increasing this value will increase analysis time in a linear fashion, but will typically improve results.
     * @return
     */
    @Override
    public int getBeamWidth() {
        return beamWidth;
    }

    @Override
    public void setBeamWidth(int beamWidth) {
        this.beamWidth = beamWidth;
    }

    /**
     * If true, the full beam of analyses produced as output by a given module will be used as input for the next module.
     * If false, only the single best analysis will be used as input for the next module.
     * @return
     */
    @Override
    public boolean isPropagateBeam() {
        return propagateBeam;
    }

    @Override
    public void setPropagateBeam(boolean propagateBeam) {
        this.propagateBeam = propagateBeam;
    }

    /**
     * If true, a generates a very detailed analysis on how Talismane obtained the results it displays.
     * @return
     */
    @Override
    public boolean isIncludeDetails() {
        return includeDetails;
    }

    @Override
    public void setIncludeDetails(boolean includeDetails) {
        this.includeDetails = includeDetails;
    }

    /**
     * The reader to be used to read the data for this analysis.
     * @return
     */
    @Override
    public Reader getReader() {
        if (this.reader == null) {
            if (inFilePath != null) {
                try {
                    File inFile = this.getFile(inFilePath);
                    if (!inFile.exists())
                        throw new TalismaneException("inFile does not exist: " + inFilePath);
                    if (inFile.isDirectory())
                        throw new TalismaneException(
                                "inFile must be a file, not a directory - use inDir instead: " + inFilePath);

                    this.reader = new BufferedReader(
                            new InputStreamReader(new FileInputStream(inFile), this.getInputCharset()));

                } catch (FileNotFoundException fnfe) {
                    LogUtils.logError(LOG, fnfe);
                    throw new RuntimeException(fnfe);
                }
            } else if (inDirPath != null) {
                File inDir = this.getFile(inDirPath);
                if (!inDir.exists())
                    throw new TalismaneException("inDir does not exist: " + inDirPath);
                if (inDir.isDirectory()) {
                    DirectoryReader directoryReader = new DirectoryReader(inDir, this.getInputCharset());
                    if (this.command == Command.analyse) {
                        directoryReader.setEndOfFileString("\n" + this.getEndBlockCharacter());
                    } else {
                        directoryReader.setEndOfFileString("\n");
                    }
                    this.reader = directoryReader;
                } else {
                    throw new TalismaneException(
                            "inDir must be a directory, not a file - use inFile instead: " + inDirPath);
                }
            } else {
                this.reader = new BufferedReader(new InputStreamReader(System.in, this.getInputCharset()));
            }
        }
        return reader;
    }

    /**
     * The reader to be used to read the data for evaluation, when command=compare.
     * @return
     */
    @Override
    public Reader getEvaluationReader() {
        if (this.evaluationReader == null) {
            try {
                File inFile = this.getFile(evaluationFilePath);
                this.evaluationReader = new BufferedReader(
                        new InputStreamReader(new FileInputStream(inFile), this.getInputCharset()));
            } catch (FileNotFoundException fnfe) {
                LogUtils.logError(LOG, fnfe);
                throw new RuntimeException(fnfe);
            }
        }
        return evaluationReader;
    }

    /**
     * A writer to which Talismane should write its output when analysing.
     * @return
     */
    @Override
    public Writer getWriter() {
        try {
            if (writer == null) {
                if (outFilePath != null) {
                    File outFile = this.getFile(outFilePath);
                    File outDir = outFile.getParentFile();
                    if (outDir != null)
                        outDir.mkdirs();
                    outFile.delete();
                    outFile.createNewFile();

                    writer = new BufferedWriter(
                            new OutputStreamWriter(new FileOutputStream(outFile), this.getOutputCharset()));
                } else if (outDirPath != null && inDirPath != null
                        && (this.getReader() instanceof CurrentFileProvider) && command != Command.evaluate) {
                    File outDir = this.getFile(outDirPath);
                    outDir.mkdirs();
                    File inDir = this.getFile(inDirPath);

                    if (this.suffix == null)
                        this.suffix = "";
                    DirectoryWriter directoryWriter = new DirectoryWriter(inDir, outDir, suffix,
                            this.getOutputCharset());
                    this.writer = directoryWriter;
                    ((CurrentFileProvider) this.getReader()).addCurrentFileObserver(directoryWriter);
                } else {
                    writer = new BufferedWriter(new OutputStreamWriter(System.out, this.getOutputCharset()));
                }
            }
            return writer;
        } catch (Exception e) {
            LogUtils.logError(LOG, e);
            throw new RuntimeException(e);
        }
    }

    /**
     * The filename to be applied to this analysis (if filename is included in the output).
     * @return
     */
    @Override
    public String getFileName() {
        return fileName;
    }

    /**
     * The directory to which we write any output files.
     * @return
     */
    @Override
    public File getOutDir() {
        if (outDirPath != null) {
            outDir = this.getFile(outDirPath);
            outDir.mkdirs();
        } else if (outFilePath != null) {
            File outFile = this.getFile(outFilePath);
            outDir = outFile.getParentFile();
            if (outDir != null) {
                outDir.mkdirs();
            }
        }
        return outDir;
    }

    /**
     * The rules to apply when running the pos-tagger.
     * @return
     */
    @Override
    public List<PosTaggerRule> getPosTaggerRules() {
        try {
            if (posTaggerRules == null) {
                posTaggerRules = new ArrayListNoNulls<PosTaggerRule>();
                for (int i = 0; i <= 1; i++) {
                    Scanner rulesScanner = null;
                    if (i == 0) {
                        if (posTaggerRulesReplace)
                            continue;
                        rulesScanner = this.implementation.getDefaultPosTaggerRulesScanner();
                    } else {
                        if (posTaggerRuleFilePath != null && posTaggerRuleFilePath.length() > 0) {
                            File posTaggerRuleFile = this.getFile(posTaggerRuleFilePath);
                            if (!posTaggerRuleFile.exists()) {
                                throw new TalismaneException(
                                        "posTaggerRules: File " + posTaggerRuleFilePath + " does not exist");
                            }
                            rulesScanner = new Scanner(new BufferedReader(new InputStreamReader(
                                    new FileInputStream(posTaggerRuleFile), this.getInputCharset().name())));
                        }
                    }

                    if (rulesScanner != null) {
                        List<String> ruleDescriptors = new ArrayListNoNulls<String>();
                        while (rulesScanner.hasNextLine()) {
                            String ruleDescriptor = rulesScanner.nextLine();
                            if (ruleDescriptor.length() > 0) {
                                ruleDescriptors.add(ruleDescriptor);
                                LOG.trace(ruleDescriptor);
                            }
                        }
                        List<PosTaggerRule> rules = this.getPosTaggerFeatureService().getRules(ruleDescriptors);
                        posTaggerRules.addAll(rules);

                    }
                }
            }
            return posTaggerRules;
        } catch (Exception e) {
            LogUtils.logError(LOG, e);
            throw new RuntimeException(e);
        }
    }

    /**
     * The rules to apply when running the parser.
     * @return
     */
    @Override
    public List<ParserRule> getParserRules() {
        try {
            if (parserRules == null) {
                parserRules = new ArrayListNoNulls<ParserRule>();
                if (parserRuleFilePath != null && parserRuleFilePath.equalsIgnoreCase("null")) {
                    // add no rules! (not even built-in ones)
                } else {
                    for (int i = 0; i <= 1; i++) {
                        Scanner rulesScanner = null;
                        if (i == 0) {
                            if (parserRulesReplace)
                                continue;
                            rulesScanner = this.implementation.getDefaultParserRulesScanner();
                        } else {
                            if (parserRuleFilePath != null && parserRuleFilePath.length() > 0) {
                                File parserRuleFile = this.getFile(parserRuleFilePath);
                                if (!parserRuleFile.exists()) {
                                    throw new TalismaneException(
                                            "parserRules: File " + parserRuleFilePath + " does not exist");
                                }
                                rulesScanner = new Scanner(new BufferedReader(new InputStreamReader(
                                        new FileInputStream(parserRuleFile), this.getInputCharset().name())));
                            }
                        }

                        if (rulesScanner != null) {
                            List<String> ruleDescriptors = new ArrayListNoNulls<String>();
                            while (rulesScanner.hasNextLine()) {
                                String ruleDescriptor = rulesScanner.nextLine();
                                if (ruleDescriptor.length() > 0) {
                                    ruleDescriptors.add(ruleDescriptor);
                                    LOG.trace(ruleDescriptor);
                                }
                            }
                            List<ParserRule> rules = this.getParserFeatureService().getRules(ruleDescriptors,
                                    dynamiseFeatures);
                            parserRules.addAll(rules);

                        }
                    }
                }
            }
            return parserRules;
        } catch (Exception e) {
            LogUtils.logError(LOG, e);
            throw new RuntimeException(e);
        }
    }

    /**
     * A regex used to process the input, when pre-annotated.
     * @return
     */
    @Override
    public String getInputRegex() {
        try {
            if (inputRegex == null && inputPatternFilePath != null && inputPatternFilePath.length() > 0) {
                Scanner inputPatternScanner = null;
                File inputPatternFile = this.getFile(inputPatternFilePath);
                inputPatternScanner = new Scanner(new BufferedReader(new InputStreamReader(
                        new FileInputStream(inputPatternFile), this.getInputCharset().name())));
                if (inputPatternScanner.hasNextLine()) {
                    inputRegex = inputPatternScanner.nextLine();
                }
                inputPatternScanner.close();
                if (inputRegex == null)
                    throw new TalismaneException("No input pattern found in " + inputPatternFilePath);
            }
            return inputRegex;
        } catch (Exception e) {
            LogUtils.logError(LOG, e);
            throw new RuntimeException(e);
        }
    }

    public void setInputRegex(String inputRegex) {
        this.inputRegex = inputRegex;
    }

    /**
     * A regex used to process the evaluation corpus.
     * @return
     */
    @Override
    public String getEvaluationRegex() {
        try {
            if (evaluationRegex == null) {
                if (evaluationPatternFilePath != null && evaluationPatternFilePath.length() > 0) {
                    Scanner evaluationPatternScanner = null;
                    File evaluationPatternFile = this.getFile(evaluationPatternFilePath);
                    evaluationPatternScanner = new Scanner(new BufferedReader(new InputStreamReader(
                            new FileInputStream(evaluationPatternFile), this.getInputCharset().name())));
                    if (evaluationPatternScanner.hasNextLine()) {
                        evaluationRegex = evaluationPatternScanner.nextLine();
                    }
                    evaluationPatternScanner.close();
                    if (evaluationRegex == null)
                        throw new TalismaneException("No evaluation pattern found in " + evaluationPatternFilePath);
                } else {
                    evaluationRegex = this.getInputRegex();
                }
            }
            return evaluationRegex;
        } catch (Exception e) {
            LogUtils.logError(LOG, e);
            throw new RuntimeException(e);
        }
    }

    /**
     * Text marker filters are applied to raw text segments extracted from the stream, 3 segments at a time.
     * This means that if a particular marker crosses segment borders, it is handled correctly.
     * @return
     */
    @Override
    public List<TextMarkerFilter> getTextMarkerFilters() {
        try {
            if (textMarkerFilters == null) {
                textMarkerFilters = new ArrayListNoNulls<TextMarkerFilter>();

                // insert sentence breaks at end of block
                this.addTextMarkerFilter(this.getFilterService().getRegexMarkerFilter(
                        new MarkerFilterType[] { MarkerFilterType.SENTENCE_BREAK }, "" + endBlockCharacter,
                        blockSize));

                // handle newline as requested
                if (newlineMarker.equals(MarkerFilterType.SENTENCE_BREAK))
                    this.addTextMarkerFilter(this.getFilterService().getNewlineEndOfSentenceMarker());
                else if (newlineMarker.equals(MarkerFilterType.SPACE))
                    this.addTextMarkerFilter(this.getFilterService().getNewlineSpaceMarker());

                // get rid of duplicate white-space always
                this.addTextMarkerFilter(this.getFilterService().getDuplicateWhiteSpaceFilter());

                List<String> paths = new ArrayListNoNulls<String>();
                if (textFiltersPath != null && textFiltersPath.length() > 0) {
                    LOG.debug("textFiltersPath: " + textFiltersPath);
                    String[] parts = textFiltersPath.split(";");
                    for (String part : parts)
                        paths.add(part);
                }
                if (!textFiltersReplace) {
                    // default text filter path
                    paths.add("");
                }

                for (String path : paths) {
                    LOG.debug("Text marker filters");
                    Scanner textFilterScanner = null;
                    if (path.length() > 0) {
                        LOG.debug("From: " + path);
                        File textFilterFile = this.getFile(path);
                        if (!textFilterFile.exists()) {
                            throw new TalismaneException("textFilters: File " + path + " does not exist");
                        }
                        textFilterScanner = new Scanner(new BufferedReader(new InputStreamReader(
                                new FileInputStream(textFilterFile), this.getInputCharset().name())));
                    } else {
                        LOG.debug("From default");
                        textFilterScanner = this.implementation.getDefaultTextMarkerFiltersScanner();
                    }
                    if (textFilterScanner != null) {
                        while (textFilterScanner.hasNextLine()) {
                            String descriptor = textFilterScanner.nextLine();
                            LOG.debug(descriptor);
                            if (descriptor.length() > 0 && !descriptor.startsWith("#")) {
                                TextMarkerFilter textMarkerFilter = this.getFilterService()
                                        .getTextMarkerFilter(descriptor, blockSize);
                                this.addTextMarkerFilter(textMarkerFilter);
                            }
                        }
                    }
                }

            }
            return textMarkerFilters;
        } catch (Exception e) {
            LogUtils.logError(LOG, e);
            throw new RuntimeException(e);
        }
    }

    @Override
    public void setTextMarkerFilters(List<TextMarkerFilter> textMarkerFilters) {
        this.textMarkerFilters = textMarkerFilters;
    }

    @Override
    public void addTextMarkerFilter(TextMarkerFilter textMarkerFilter) {
        this.textMarkerFilters.add(textMarkerFilter);
    }

    /**
     * TokenFilters to be applied during analysis.
     * @return
     */
    private List<TokenSequenceFilter> getTokenSequenceFilters(MachineLearningModel model) {
        try {
            if (tokenSequenceFilters == null) {
                List<String> tokenSequenceFilterDescriptors = new ArrayListNoNulls<String>();
                tokenSequenceFilters = new ArrayListNoNulls<TokenSequenceFilter>();

                LOG.debug("Token sequence filters");

                List<Scanner> scanners = new ArrayListNoNulls<Scanner>();
                if (tokenSequenceFilterPath != null && tokenSequenceFilterPath.length() > 0) {
                    LOG.debug("tokenSequenceFilterPath: " + tokenSequenceFilterPath);
                    String[] parts = tokenSequenceFilterPath.split(";");
                    for (String part : parts) {
                        if (part.length() > 0) {
                            LOG.debug("From: " + part);
                            File tokenSequenceFilterFile = this.getFile(part);
                            if (!tokenSequenceFilterFile.exists()) {
                                throw new TalismaneException(
                                        "tokenSequenceFilters: File " + part + " does not exist");
                            }
                            Scanner scanner = new Scanner(new BufferedReader(new InputStreamReader(
                                    new FileInputStream(tokenSequenceFilterFile), this.getInputCharset())));
                            scanners.add(scanner);
                        }
                    }
                }
                if (!tokenSequenceFiltersReplace) {
                    if (model != null) {
                        LOG.debug("From model");
                        List<String> modelDescriptors = model.getDescriptors()
                                .get(PosTagFilterService.POSTAG_PREPROCESSING_FILTER_DESCRIPTOR_KEY);
                        String modelDescriptorString = "";
                        if (modelDescriptors != null) {
                            for (String descriptor : modelDescriptors) {
                                modelDescriptorString += descriptor + "\n";
                            }
                        }
                        Scanner scanner = new Scanner(modelDescriptorString);
                        scanners.add(scanner);
                    } else {
                        // default token filters
                        LOG.debug("From default");
                        Scanner scanner = this.implementation.getDefaultTokenSequenceFiltersScanner();
                        scanners.add(scanner);
                    }
                }

                for (Scanner scanner : scanners) {
                    while (scanner.hasNextLine()) {
                        String descriptor = scanner.nextLine();
                        LOG.debug(descriptor);
                        tokenSequenceFilterDescriptors.add(descriptor);
                        if (descriptor.length() > 0 && !descriptor.startsWith("#")) {
                            TokenSequenceFilter tokenSequenceFilter = this.getTokenFilterService()
                                    .getTokenSequenceFilter(descriptor);
                            if (tokenSequenceFilter instanceof NeedsTalismaneSession)
                                ((NeedsTalismaneSession) tokenSequenceFilter).setTalismaneSession(talismaneSession);
                            tokenSequenceFilters.add(tokenSequenceFilter);
                        }
                    }
                }

                this.getDescriptors().put(PosTagFilterService.POSTAG_PREPROCESSING_FILTER_DESCRIPTOR_KEY,
                        tokenSequenceFilterDescriptors);
            }
            return tokenSequenceFilters;
        } catch (Exception e) {
            LogUtils.logError(LOG, e);
            throw new RuntimeException(e);
        }
    }

    private List<PosTagSequenceFilter> getPosTagSequenceFilters(MachineLearningModel model) {
        try {
            if (posTaggerPostProcessingFilters == null) {
                List<String> posTaggerPostProcessingFilterDescriptors = new ArrayListNoNulls<String>();
                posTaggerPostProcessingFilters = new ArrayListNoNulls<PosTagSequenceFilter>();

                List<Scanner> scanners = new ArrayListNoNulls<Scanner>();

                if (posTagSequenceFilterPath != null) {
                    File filterFile = this.getFile(posTagSequenceFilterPath);
                    Scanner scanner = new Scanner(new BufferedReader(
                            new InputStreamReader(new FileInputStream(filterFile), this.getInputCharset())));
                    scanners.add(scanner);
                } else if (model != null) {
                    List<String> modelDescriptors = model.getDescriptors()
                            .get(PosTagFilterService.POSTAG_POSTPROCESSING_FILTER_DESCRIPTOR_KEY);
                    if (modelDescriptors != null) {
                        String modelDescriptorString = "";
                        if (modelDescriptors != null) {
                            for (String descriptor : modelDescriptors) {
                                modelDescriptorString += descriptor + "\n";
                            }
                        }
                        Scanner scanner = new Scanner(modelDescriptorString);
                        scanners.add(scanner);
                    }
                }

                for (Scanner scanner : scanners) {
                    while (scanner.hasNextLine()) {
                        String descriptor = scanner.nextLine();
                        LOG.debug(descriptor);
                        posTaggerPostProcessingFilterDescriptors.add(descriptor);
                        if (descriptor.length() > 0 && !descriptor.startsWith("#")) {
                            PosTagSequenceFilter filter = this.getPosTagFilterService()
                                    .getPosTagSequenceFilter(descriptor);
                            posTaggerPostProcessingFilters.add(filter);
                        }
                    }
                }

                this.getDescriptors().put(PosTagFilterService.POSTAG_POSTPROCESSING_FILTER_DESCRIPTOR_KEY,
                        posTaggerPostProcessingFilterDescriptors);

            }
            return posTaggerPostProcessingFilters;
        } catch (Exception e) {
            LogUtils.logError(LOG, e);
            throw new RuntimeException(e);
        }
    }

    /**
     * TokenFilters to be applied during analysis.
     * @return
     */
    private List<TokenFilter> getTokenFilters(MachineLearningModel model) {
        try {
            if (tokenFilters == null) {
                List<String> tokenFilterDescriptors = new ArrayListNoNulls<String>();
                tokenFilters = new ArrayListNoNulls<TokenFilter>();

                LOG.debug("Token filters");

                for (TokenFilter tokenFilter : this.prependedTokenFilters)
                    this.tokenFilters.add(tokenFilter);

                List<Scanner> scanners = new ArrayListNoNulls<Scanner>();
                if (tokenFiltersPath != null && tokenFiltersPath.length() > 0) {
                    LOG.debug("tokenFiltersPath: " + tokenFiltersPath);
                    String[] parts = tokenFiltersPath.split(";");
                    for (String part : parts) {
                        if (part.length() > 0) {
                            LOG.debug("From: " + part);
                            File tokenFilterFile = this.getFile(part);
                            if (!tokenFilterFile.exists()) {
                                throw new TalismaneException("tokenFilters: File " + part + " does not exist");
                            }
                            Scanner tokenFilterScanner = new Scanner(new BufferedReader(new InputStreamReader(
                                    new FileInputStream(tokenFilterFile), this.getInputCharset())));
                            scanners.add(tokenFilterScanner);
                        }
                    }
                }
                if (!tokenFiltersReplace) {
                    if (model != null) {
                        LOG.debug("From model");
                        List<String> modelDescriptors = model.getDescriptors()
                                .get(TokenFilterService.TOKEN_FILTER_DESCRIPTOR_KEY);
                        String modelDescriptorString = "";
                        if (modelDescriptors != null) {
                            for (String descriptor : modelDescriptors) {
                                modelDescriptorString += descriptor + "\n";
                            }
                        }
                        Scanner scanner = new Scanner(modelDescriptorString);
                        scanners.add(scanner);
                    } else {
                        // default token filters
                        LOG.debug("From default");
                        Scanner tokenFilterScanner = this.implementation.getDefaultTokenFiltersScanner();
                        scanners.add(tokenFilterScanner);
                    }
                }

                for (Scanner scanner : scanners) {
                    List<TokenFilter> myFilters = this.getTokenFilterService().readTokenFilters(scanner,
                            tokenFilterDescriptors);
                    for (TokenFilter tokenFilter : myFilters) {
                        tokenFilters.add(tokenFilter);
                    }
                }
                this.getDescriptors().put(TokenFilterService.TOKEN_FILTER_DESCRIPTOR_KEY, tokenFilterDescriptors);

                for (TokenFilter tokenFilter : this.additionalTokenFilters)
                    this.tokenFilters.add(tokenFilter);
            }
            return tokenFilters;
        } catch (Exception e) {
            LogUtils.logError(LOG, e);
            throw new RuntimeException(e);
        }
    }

    /**
     * The language detector to use for analysis.
     * @return
     */
    @Override
    public LanguageDetector getLanguageDetector() {
        try {
            if (languageDetector == null) {
                LOG.debug("Getting language detector model");
                ClassificationModel<LanguageOutcome> languageModel = null;
                if (languageModelFilePath != null) {
                    File languageModelFile = this.getFile(languageModelFilePath);
                    if (!languageModelFile.exists())
                        throw new TalismaneException("Could not find languageModel at: " + languageModelFilePath);
                    languageModel = this.getMachineLearningService()
                            .getClassificationModel(new ZipInputStream(new FileInputStream(languageModelFile)));
                } else {
                    throw new TalismaneException("Cannot detect languages with languageModel");
                }
                languageDetector = this.getLanguageDetectorService().getLanguageDetector(languageModel);
            }
            return languageDetector;
        } catch (Exception e) {
            LogUtils.logError(LOG, e);
            throw new RuntimeException(e);
        }
    }

    /**
     * The sentence detector to use for analysis.
     * @return
     */
    @Override
    public SentenceDetector getSentenceDetector() {
        try {
            if (sentenceDetector == null) {
                LOG.debug("Getting sentence detector model");
                ClassificationModel<SentenceDetectorOutcome> sentenceModel = null;
                if (sentenceModelFilePath != null) {
                    File sentenceModelFile = this.getFile(sentenceModelFilePath);
                    if (!sentenceModelFile.exists())
                        throw new TalismaneException("Could not find sentenceModel at: " + sentenceModelFilePath);

                    sentenceModel = this.getMachineLearningService()
                            .getClassificationModel(new ZipInputStream(new FileInputStream(sentenceModelFile)));
                } else {
                    sentenceModel = this.implementation.getDefaultSentenceModel();
                    if (sentenceModel == null)
                        throw new TalismaneException("No sentenceModel provided");
                }
                sentenceDetector = this.getSentenceDetectorService().getSentenceDetector(sentenceModel);
            }
            return sentenceDetector;
        } catch (Exception e) {
            LogUtils.logError(LOG, e);
            throw new RuntimeException(e);
        }
    }

    /**
     * The tokeniser to use for analysis.
     * @return
     */
    @Override
    public Tokeniser getTokeniser() {
        try {
            if (tokeniser == null) {
                ClassificationModel<TokeniserOutcome> tokeniserModel = null;
                if (tokeniserType == TokeniserType.simple) {
                    tokeniser = this.getTokeniserService().getSimpleTokeniser();
                } else if (tokeniserType == TokeniserType.pattern) {
                    LOG.debug("Getting tokeniser model");
                    tokeniserModel = this.getTokeniserModel();
                    if (tokeniserModel == null)
                        throw new TalismaneException("No tokeniserModel provided");

                    tokeniser = this.getTokeniserPatternService().getPatternTokeniser(tokeniserModel,
                            tokeniserBeamWidth);

                    if (includeDetails) {
                        String detailsFilePath = this.getBaseName() + "_tokeniser_details.txt";
                        File detailsFile = new File(this.getOutDir(), detailsFilePath);
                        detailsFile.delete();
                        ClassificationObserver<TokeniserOutcome> observer = tokeniserModel
                                .getDetailedAnalysisObserver(detailsFile);
                        tokeniser.addObserver(observer);
                    }
                } else {
                    throw new TalismaneException("Unknown tokeniserType: " + tokeniserType);
                }

                for (TokenFilter tokenFilter : this.getTokenFilters(tokeniserModel)) {
                    tokeniser.addTokenFilter(tokenFilter);
                    if (this.needsSentenceDetector()) {
                        this.getSentenceDetector().addTokenFilter(tokenFilter);
                    }
                }

                for (TokenSequenceFilter tokenFilter : this.getTokenSequenceFilters(tokeniserModel)) {
                    tokeniser.addTokenSequenceFilter(tokenFilter);
                }
            }
            return tokeniser;
        } catch (Exception e) {
            LogUtils.logError(LOG, e);
            throw new RuntimeException(e);
        }
    }

    ClassificationModel<TokeniserOutcome> getTokeniserModel() {
        try {
            if (tokeniserModel == null) {
                if (tokeniserModelFilePath != null) {
                    File tokeniserModelFile = this.getFile(tokeniserModelFilePath);
                    if (!tokeniserModelFile.exists())
                        throw new TalismaneException("Could not find tokeniserModel at: " + tokeniserModelFilePath);

                    tokeniserModel = this.getMachineLearningService()
                            .getClassificationModel(new ZipInputStream(new FileInputStream(tokeniserModelFile)));
                } else {
                    tokeniserModel = this.implementation.getDefaultTokeniserModel();
                }
            }
            return tokeniserModel;
        } catch (Exception e) {
            LogUtils.logError(LOG, e);
            throw new RuntimeException(e);
        }
    }

    ClassificationModel<PosTag> getPosTaggerModel() {
        try {
            if (posTaggerModel == null) {
                if (posTaggerModelFilePath != null) {
                    File posTaggerModelFile = this.getFile(posTaggerModelFilePath);
                    if (!posTaggerModelFile.exists())
                        throw new TalismaneException("Could not find posTaggerModel at: " + posTaggerModelFilePath);

                    posTaggerModel = this.getMachineLearningService()
                            .getClassificationModel(new ZipInputStream(new FileInputStream(posTaggerModelFile)));
                } else {
                    posTaggerModel = this.implementation.getDefaultPosTaggerModel();
                }
            }
            return posTaggerModel;
        } catch (Exception e) {
            LogUtils.logError(LOG, e);
            throw new RuntimeException(e);
        }
    }

    MachineLearningModel getParserModel() {
        try {
            if (parserModel == null) {
                if (parserModelFilePath != null) {
                    File parserModelFile = this.getFile(parserModelFilePath);
                    if (!parserModelFile.exists())
                        throw new TalismaneException("Could not find parserModel at: " + parserModelFilePath);

                    parserModel = this.getMachineLearningService()
                            .getClassificationModel(new ZipInputStream(new FileInputStream(parserModelFile)));
                } else {
                    parserModel = this.implementation.getDefaultParserModel();
                }
            }
            return parserModel;
        } catch (Exception e) {
            LogUtils.logError(LOG, e);
            throw new RuntimeException(e);
        }
    }

    @Override
    public TokeniserPatternManager getTokeniserPatternManager() {
        if (tokeniserPatternManager == null) {
            if (tokeniserPatternFilePath.length() == 0)
                throw new RuntimeException("Missing argument: tokeniserPatterns");
            try {
                File tokeniserPatternFile = this.getFile(tokeniserPatternFilePath);
                Scanner scanner = new Scanner(new BufferedReader(
                        new InputStreamReader(new FileInputStream(tokeniserPatternFile), this.getInputCharset())));
                List<String> patternDescriptors = new ArrayListNoNulls<String>();
                while (scanner.hasNextLine()) {
                    String descriptor = scanner.nextLine();
                    patternDescriptors.add(descriptor);
                    LOG.debug(descriptor);
                }
                scanner.close();

                this.getDescriptors().put(TokeniserPatternService.PATTERN_DESCRIPTOR_KEY, patternDescriptors);

                tokeniserPatternManager = this.getTokeniserPatternService().getPatternManager(patternDescriptors);
            } catch (Exception e) {
                LogUtils.logError(LOG, e);
                throw new RuntimeException(e);
            }
        }
        return tokeniserPatternManager;
    }

    @Override
    public Set<LanguageDetectorFeature<?>> getLanguageDetectorFeatures() {
        if (languageFeatures == null) {
            try {
                if (languageFeaturePath != null) {
                    LOG.debug("Found setting to change language detector features");
                    File languageFeatureFile = this.getFile(languageFeaturePath);
                    Scanner scanner = new Scanner(new BufferedReader(new InputStreamReader(
                            new FileInputStream(languageFeatureFile), this.getInputCharset())));
                    List<String> featureDescriptors = new ArrayListNoNulls<String>();
                    while (scanner.hasNextLine()) {
                        String descriptor = scanner.nextLine();
                        featureDescriptors.add(descriptor);
                        LOG.debug(descriptor);
                    }
                    languageFeatures = this.getLanguageDetectorService().getFeatureSet(featureDescriptors);
                    this.getDescriptors().put(MachineLearningModel.FEATURE_DESCRIPTOR_KEY, featureDescriptors);
                }
            } catch (Exception e) {
                LogUtils.logError(LOG, e);
                throw new RuntimeException(e);
            }
        }
        return languageFeatures;
    }

    @Override
    public Set<SentenceDetectorFeature<?>> getSentenceDetectorFeatures() {
        if (sentenceFeatures == null) {
            try {
                if (sentenceFeaturePath != null) {
                    LOG.debug("Found setting to change sentence detector features");
                    File sentenceFeatureFile = this.getFile(sentenceFeaturePath);
                    Scanner scanner = new Scanner(new BufferedReader(new InputStreamReader(
                            new FileInputStream(sentenceFeatureFile), this.getInputCharset())));
                    List<String> featureDescriptors = new ArrayListNoNulls<String>();
                    while (scanner.hasNextLine()) {
                        String descriptor = scanner.nextLine();
                        featureDescriptors.add(descriptor);
                        LOG.debug(descriptor);
                    }
                    sentenceFeatures = this.getSentenceDetectorFeatureService().getFeatureSet(featureDescriptors);
                    this.getDescriptors().put(MachineLearningModel.FEATURE_DESCRIPTOR_KEY, featureDescriptors);
                }
            } catch (Exception e) {
                LogUtils.logError(LOG, e);
                throw new RuntimeException(e);
            }
        }
        return sentenceFeatures;
    }

    @Override
    public Set<TokeniserContextFeature<?>> getTokeniserContextFeatures() {
        if (tokeniserContextFeatures == null) {
            try {
                if (tokeniserFeaturePath != null) {
                    TokeniserPatternManager tokeniserPatternManager = this.getTokeniserPatternManager();
                    LOG.debug("Found setting to change tokeniser context features");
                    File tokeniserFeatureFile = this.getFile(tokeniserFeaturePath);
                    Scanner scanner = new Scanner(new BufferedReader(new InputStreamReader(
                            new FileInputStream(tokeniserFeatureFile), this.getInputCharset())));
                    List<String> featureDescriptors = new ArrayListNoNulls<String>();
                    while (scanner.hasNextLine()) {
                        String descriptor = scanner.nextLine();
                        featureDescriptors.add(descriptor);
                        LOG.debug(descriptor);
                    }
                    tokeniserContextFeatures = this.getTokenFeatureService().getTokeniserContextFeatureSet(
                            featureDescriptors, tokeniserPatternManager.getParsedTestPatterns());
                    this.getDescriptors().put(MachineLearningModel.FEATURE_DESCRIPTOR_KEY, featureDescriptors);
                }
            } catch (Exception e) {
                LogUtils.logError(LOG, e);
                throw new RuntimeException(e);
            }
        }
        return tokeniserContextFeatures;
    }

    @Override
    public Set<TokenPatternMatchFeature<?>> getTokenPatternMatchFeatures() {
        if (tokenPatternMatchFeatures == null) {
            try {
                if (tokeniserFeaturePath != null) {
                    LOG.debug("Found setting to change token pattern match features");
                    File tokeniserFeatureFile = this.getFile(tokeniserFeaturePath);
                    Scanner scanner = new Scanner(new BufferedReader(new InputStreamReader(
                            new FileInputStream(tokeniserFeatureFile), this.getInputCharset())));
                    List<String> featureDescriptors = new ArrayListNoNulls<String>();
                    while (scanner.hasNextLine()) {
                        String descriptor = scanner.nextLine();
                        featureDescriptors.add(descriptor);
                        LOG.debug(descriptor);
                    }
                    tokenPatternMatchFeatures = this.getTokenFeatureService()
                            .getTokenPatternMatchFeatureSet(featureDescriptors);
                    this.getDescriptors().put(MachineLearningModel.FEATURE_DESCRIPTOR_KEY, featureDescriptors);
                }
            } catch (Exception e) {
                LogUtils.logError(LOG, e);
                throw new RuntimeException(e);
            }
        }
        return tokenPatternMatchFeatures;
    }

    @Override
    public Set<PosTaggerFeature<?>> getPosTaggerFeatures() {
        if (posTaggerFeatures == null) {
            try {
                if (posTaggerFeaturePath != null) {
                    LOG.debug("Found setting to change pos-tagger features");
                    File posTaggerFeatureFile = this.getFile(posTaggerFeaturePath);
                    Scanner scanner = new Scanner(new BufferedReader(new InputStreamReader(
                            new FileInputStream(posTaggerFeatureFile), this.getInputCharset())));
                    List<String> featureDescriptors = new ArrayListNoNulls<String>();
                    while (scanner.hasNextLine()) {
                        String descriptor = scanner.nextLine();
                        featureDescriptors.add(descriptor);
                        LOG.debug(descriptor);
                    }
                    posTaggerFeatures = this.getPosTaggerFeatureService().getFeatureSet(featureDescriptors);
                    this.getDescriptors().put(MachineLearningModel.FEATURE_DESCRIPTOR_KEY, featureDescriptors);
                }
            } catch (Exception e) {
                LogUtils.logError(LOG, e);
                throw new RuntimeException(e);
            }
        }
        return posTaggerFeatures;
    }

    @Override
    public ClassificationEventStream getClassificationEventStream() {
        if (this.classificationEventStream == null) {
            switch (this.getModule()) {
            case LanguageDetector:
                classificationEventStream = this.getLanguageDetectorService().getLanguageDetectorEventStream(
                        this.getLanguageCorpusReader(), this.getLanguageDetectorFeatures());
                break;
            case SentenceDetector:
                classificationEventStream = this.getSentenceDetectorService().getSentenceDetectorEventStream(
                        this.getSentenceCorpusReader(), this.getSentenceDetectorFeatures());
                break;
            case Tokeniser:
                if (patternTokeniserType == PatternTokeniserType.Interval) {
                    Set<TokeniserContextFeature<?>> features = this.getTokeniserContextFeatures();
                    classificationEventStream = this.getTokeniserPatternService().getIntervalPatternEventStream(
                            this.getTokenCorpusReader(), features, this.getTokeniserPatternManager());
                } else {
                    Set<TokenPatternMatchFeature<?>> features = this.getTokenPatternMatchFeatures();
                    classificationEventStream = this.getTokeniserPatternService().getCompoundPatternEventStream(
                            this.getTokenCorpusReader(), features, this.getTokeniserPatternManager());
                }
                break;
            case PosTagger:
                classificationEventStream = this.getPosTaggerService()
                        .getPosTagEventStream(this.getPosTagCorpusReader(), this.getPosTaggerFeatures());
                break;
            case Parser:
                classificationEventStream = this.getParserService()
                        .getParseEventStream(this.getParserCorpusReader(), this.getParserFeatures());
                break;
            default:
                throw new TalismaneException("Unsupported module: " + this.getModule());
            }
        }
        return classificationEventStream;
    }

    /**
     * The pos-tagger to use for analysis.
     * @return
     */
    @Override
    public PosTagger getPosTagger() {
        try {
            if (posTagger == null) {
                LOG.debug("Getting pos-tagger model");

                ClassificationModel<PosTag> posTaggerModel = this.getPosTaggerModel();
                if (posTaggerModel == null)
                    throw new TalismaneException("No posTaggerModel provided");

                posTagger = this.getPosTaggerService().getPosTagger(posTaggerModel, posTaggerBeamWidth);

                if (posTaggerFeaturePath != null) {
                    Set<PosTaggerFeature<?>> posTaggerFeatures = this.getPosTaggerFeatures();
                    posTagger.setPosTaggerFeatures(posTaggerFeatures);
                }

                for (TokenSequenceFilter tokenFilter : this.getTokenSequenceFilters(posTaggerModel)) {
                    posTagger.addPreProcessingFilter(tokenFilter);
                }

                for (PosTagSequenceFilter posTagFilter : this.getPosTagSequenceFilters(posTaggerModel)) {
                    posTagger.addPostProcessingFilter(posTagFilter);
                }

                posTagger.setPosTaggerRules(this.getPosTaggerRules());

                if (includeDetails) {
                    String detailsFilePath = this.getBaseName() + "_posTagger_details.txt";
                    File detailsFile = new File(this.getOutDir(), detailsFilePath);
                    detailsFile.delete();
                    ClassificationObserver<PosTag> observer = posTaggerModel
                            .getDetailedAnalysisObserver(detailsFile);
                    posTagger.addObserver(observer);
                }
            }
            return posTagger;
        } catch (Exception e) {
            LogUtils.logError(LOG, e);
            throw new RuntimeException(e);
        }
    }

    /**
     * The parser to use for analysis.
     * @return
     */
    @Override
    public Parser getParser() {
        try {
            if (parser == null) {
                LOG.debug("Getting parser model");
                MachineLearningModel parserModel = this.getParserModel();
                if (parserModel == null)
                    throw new TalismaneException("No parserModel provided");

                parser = this.getParserService().getTransitionBasedParser(parserModel, parserBeamWidth,
                        dynamiseFeatures);
                parser.setMaxAnalysisTimePerSentence(maxParseAnalysisTime);
                parser.setMinFreeMemory(minFreeMemory);

                if (this.parserFeaturePath != null) {
                    Set<ParseConfigurationFeature<?>> parserFeatures = this.getParserFeatures();
                    parser.setParseFeatures(parserFeatures);
                }

                parser.setParserRules(this.getParserRules());

                if (parser instanceof TransitionBasedParser) {
                    TransitionBasedParser transitionBasedParser = (TransitionBasedParser) parser;
                    transitionBasedParser.setEarlyStop(earlyStop);
                }

                if (parseComparisonStrategyType != null) {
                    ParseComparisonStrategy parseComparisonStrategy = parserService
                            .getParseComparisonStrategy(parseComparisonStrategyType);
                    parser.setParseComparisonStrategy(parseComparisonStrategy);
                }

                if (includeDetails && parserModel instanceof ClassificationModel) {
                    String detailsFilePath = this.getBaseName() + "_parser_details.txt";
                    File detailsFile = new File(this.getOutDir(), detailsFilePath);
                    detailsFile.delete();
                    @SuppressWarnings("unchecked")
                    ClassificationModel<Transition> classificationModel = (ClassificationModel<Transition>) parserModel;
                    ClassificationObserver<Transition> observer = classificationModel
                            .getDetailedAnalysisObserver(detailsFile);
                    parser.addObserver(observer);
                }
                talismaneSession.setTransitionSystem(parser.getTransitionSystem());

            }
            return parser;
        } catch (Exception e) {
            LogUtils.logError(LOG, e);
            throw new RuntimeException(e);
        }
    }

    @Override
    public Set<ParseConfigurationFeature<?>> getParserFeatures() {
        if (parserFeatures == null) {
            try {
                if (parserFeaturePath != null) {
                    LOG.debug("Found setting to change parser features");
                    File parserFeatureFile = this.getFile(parserFeaturePath);
                    Scanner scanner = new Scanner(new BufferedReader(
                            new InputStreamReader(new FileInputStream(parserFeatureFile), this.getInputCharset())));
                    List<String> featureDescriptors = new ArrayListNoNulls<String>();
                    while (scanner.hasNextLine()) {
                        String descriptor = scanner.nextLine();
                        featureDescriptors.add(descriptor);
                        LOG.debug(descriptor);
                    }
                    parserFeatures = this.getParserFeatureService().getFeatures(featureDescriptors);

                    this.getDescriptors().put(MachineLearningModel.FEATURE_DESCRIPTOR_KEY, featureDescriptors);
                }
            } catch (Exception e) {
                LogUtils.logError(LOG, e);
                throw new RuntimeException(e);
            }
        }
        return parserFeatures;
    }

    /**
     * The maximum amount of time the parser will spend analysing any single sentence, in seconds.
     * If it exceeds this time, the parser will return a partial analysis, or a "dependency forest",
     * where certain nodes are left unattached (no governor).<br/>
     * A value of 0 indicates that there is no maximum time -
     * the parser will always continue until sentence analysis is complete.<br/>
     * The default value is 60.<br/>
     * @return
     */
    @Override
    public int getMaxParseAnalysisTime() {
        return maxParseAnalysisTime;
    }

    @Override
    public void setMaxParseAnalysisTime(int maxParseAnalysisTime) {
        this.maxParseAnalysisTime = maxParseAnalysisTime;
    }

    /**
     * A sentence processor to process sentences that have been read.
     * @return
     */
    @Override
    public SentenceProcessor getSentenceProcessor() {
        try {
            if (sentenceProcessor == null && endModule.equals(Module.SentenceDetector)) {
                Reader templateReader = null;
                if (templatePath == null) {
                    templateReader = new BufferedReader(
                            new InputStreamReader(getInputStreamFromResource(sentenceTemplateName)));
                } else {
                    templateReader = new BufferedReader(new FileReader(this.getFile(templatePath)));
                }
                FreemarkerTemplateWriter templateWriter = new FreemarkerTemplateWriter(templateReader);
                sentenceProcessor = templateWriter;
            }
            return sentenceProcessor;
        } catch (Exception e) {
            LogUtils.logError(LOG, e);
            throw new RuntimeException(e);
        }
    }

    /**
     * A token sequence processor to process token sequences that have been read.
     * @return
     */
    @Override
    public TokenSequenceProcessor getTokenSequenceProcessor() {
        try {
            if (tokenSequenceProcessor == null && endModule.equals(Module.Tokeniser)) {
                Reader templateReader = null;
                if (templatePath == null) {
                    templateReader = new BufferedReader(
                            new InputStreamReader(getInputStreamFromResource(tokeniserTemplateName)));
                } else {
                    templateReader = new BufferedReader(new FileReader(this.getFile(templatePath)));
                }
                FreemarkerTemplateWriter templateWriter = new FreemarkerTemplateWriter(templateReader);
                tokenSequenceProcessor = templateWriter;
            }
            return tokenSequenceProcessor;
        } catch (Exception e) {
            LogUtils.logError(LOG, e);
            throw new RuntimeException(e);
        }
    }

    /**
     * A pos-tag sequence processor to process pos-tag sequences that have been read.
     * @return
     */
    @Override
    public PosTagSequenceProcessor getPosTagSequenceProcessor() {
        try {
            if (posTagSequenceProcessor == null && endModule.equals(Module.PosTagger)) {
                if (this.option == Option.posTagFeatureTester) {
                    File file = new File(this.getOutDir(), this.getBaseName() + "_featureTest.txt");
                    posTagSequenceProcessor = this.getPosTaggerService()
                            .getPosTagFeatureTester(this.getPosTaggerFeatures(), this.testWords, file);
                } else {
                    Reader templateReader = null;
                    if (templatePath == null) {
                        templateReader = new BufferedReader(
                                new InputStreamReader(getInputStreamFromResource(posTaggerTemplateName)));
                    } else {
                        templateReader = new BufferedReader(new FileReader(this.getFile(templatePath)));
                    }
                    FreemarkerTemplateWriter templateWriter = new FreemarkerTemplateWriter(templateReader);
                    posTagSequenceProcessor = templateWriter;
                }
            }
            return posTagSequenceProcessor;
        } catch (Exception e) {
            LogUtils.logError(LOG, e);
            throw new RuntimeException(e);
        }
    }

    /**
     * A parse configuration processor to process parse configurations that have been read.
     * @return
     */
    @Override
    public ParseConfigurationProcessor getParseConfigurationProcessor() {
        try {
            if (parseConfigurationProcessor == null && endModule.equals(Module.Parser)) {
                if (option == null) {
                    Reader templateReader = null;
                    if (templatePath == null) {
                        templateReader = new BufferedReader(
                                new InputStreamReader(getInputStreamFromResource(parserTemplateName)));
                    } else {
                        templateReader = new BufferedReader(new FileReader(this.getFile(templatePath)));
                    }
                    FreemarkerTemplateWriter templateWriter = new FreemarkerTemplateWriter(templateReader);
                    parseConfigurationProcessor = templateWriter;
                } else if (option.equals(Option.loadParsingConstraints)) {
                    ParsingConstrainer constrainer = this.getParserService().getParsingConstrainer();
                    this.getOutDir();
                    File outFile = this.getFile(outFilePath);
                    constrainer.setFile(outFile);
                    parseConfigurationProcessor = constrainer;
                } else if (option.equals(Option.parseFeatureTester)) {
                    File file = new File(this.getOutDir(), this.getBaseName() + "_featureTest.txt");
                    parseConfigurationProcessor = this.getParserService()
                            .getParseFeatureTester(this.getParserFeatures(), file);
                } else {
                    throw new TalismaneException("Unknown option: " + option.toString());
                }

                if (includeTransitionLog) {
                    ParseConfigurationProcessorChain chain = new ParseConfigurationProcessorChain();
                    chain.addProcessor(parseConfigurationProcessor);

                    File csvFile = new File(this.getOutDir(), this.getBaseName() + "_transitions.csv");
                    csvFile.delete();
                    csvFile.createNewFile();
                    Writer csvFileWriter = new BufferedWriter(
                            new OutputStreamWriter(new FileOutputStream(csvFile, false), "UTF8"));
                    ParseConfigurationProcessor transitionLogWriter = this.getParserService()
                            .getTransitionLogWriter(csvFileWriter);

                    chain.addProcessor(transitionLogWriter);

                    parseConfigurationProcessor = chain;
                }
            }
            return parseConfigurationProcessor;
        } catch (Exception e) {
            LogUtils.logError(LOG, e);
            throw new RuntimeException(e);
        }
    }

    /**
     * A token corpus reader to read a corpus pre-annotated in tokens.
     * Note that in general, any filters up to and including the tokeniser should be applied to the corpus reader.
     * @return
     */
    @Override
    public TokeniserAnnotatedCorpusReader getTokenCorpusReader() {
        if (tokenCorpusReader == null) {
            TokenRegexBasedCorpusReader tokenRegexCorpusReader = this.getTokeniserService()
                    .getRegexBasedCorpusReader(this.getReader());
            if (this.getInputRegex() != null)
                tokenRegexCorpusReader.setRegex(this.getInputRegex());

            if (this.sentenceReaderPath != null) {

                try {
                    File sentenceReaderFile = this.getFile(sentenceReaderPath);
                    Reader sentenceFileReader = new BufferedReader(
                            new InputStreamReader(new FileInputStream(sentenceReaderFile), this.getInputCharset()));
                    SentenceDetectorAnnotatedCorpusReader sentenceReader = this.getSentenceDetectorService()
                            .getDefaultReader(sentenceFileReader);
                    tokenRegexCorpusReader.setSentenceReader(sentenceReader);
                } catch (FileNotFoundException fnfe) {
                    LogUtils.logError(LOG, fnfe);
                    throw new RuntimeException(fnfe);
                }
            }
            this.tokenCorpusReader = tokenRegexCorpusReader;
        }
        this.setCorpusReaderAttributes(tokenCorpusReader);

        this.addTokenCorpusReaderFilters(tokenCorpusReader);
        return tokenCorpusReader;
    }

    @Override
    public TokeniserAnnotatedCorpusReader getTokenEvaluationCorpusReader() {
        if (tokenEvaluationCorpusReader == null) {
            TokenRegexBasedCorpusReader tokenRegexCorpusReader = this.getTokeniserService()
                    .getRegexBasedCorpusReader(this.getEvaluationReader());
            if (this.getEvaluationRegex() != null)
                tokenRegexCorpusReader.setRegex(this.getEvaluationRegex());

            this.tokenEvaluationCorpusReader = tokenRegexCorpusReader;
        }
        this.setCorpusReaderAttributes(tokenEvaluationCorpusReader);

        this.addTokenCorpusReaderFilters(tokenEvaluationCorpusReader);
        return tokenEvaluationCorpusReader;
    }

    void addTokenCorpusReaderFilters(TokeniserAnnotatedCorpusReader corpusReader) {
        if (!tokenCorpusReaderFiltersAdded) {
            MachineLearningModel myTokeniserModel = null;

            if (command != Command.train) {
                myTokeniserModel = this.getTokeniserModel();
            }

            for (TokenFilter tokenFilter : this.getTokenFilters(myTokeniserModel)) {
                corpusReader.addTokenFilter(tokenFilter);
            }

            for (TokenSequenceFilter tokenSequenceFilter : this.getTokenSequenceFilters(myTokeniserModel)) {
                corpusReader.addTokenSequenceFilter(tokenSequenceFilter);
            }
            this.tokenCorpusReaderFiltersAdded = true;
        }
    }

    @Override
    public void setTokenCorpusReader(TokeniserAnnotatedCorpusReader tokenCorpusReader) {
        this.tokenCorpusReader = tokenCorpusReader;
    }

    /**
     * A pos tag corpus reader to read a corpus pre-annotated in postags.
     * Note that, in general, any filters up to and including the pos-tagger should be applied to the reader.
     * @return
     */
    @Override
    public PosTagAnnotatedCorpusReader getPosTagCorpusReader() {
        if (posTagCorpusReader == null) {
            PosTagRegexBasedCorpusReader posTagRegexBasedCorpusReader = this.getPosTaggerService()
                    .getRegexBasedCorpusReader(this.getReader());
            if (this.getInputRegex() != null)
                posTagRegexBasedCorpusReader.setRegex(this.getInputRegex());
            posTagCorpusReader = posTagRegexBasedCorpusReader;
        }
        this.setCorpusReaderAttributes(posTagCorpusReader);

        this.addPosTagCorpusReaderFilters(posTagCorpusReader);
        return posTagCorpusReader;
    }

    @Override
    public PosTagAnnotatedCorpusReader getPosTagEvaluationCorpusReader() {
        if (posTagEvaluationCorpusReader == null) {
            PosTagRegexBasedCorpusReader posTagRegexCorpusReader = this.getPosTaggerService()
                    .getRegexBasedCorpusReader(this.getEvaluationReader());
            if (this.getEvaluationRegex() != null)
                posTagRegexCorpusReader.setRegex(this.getEvaluationRegex());
            this.posTagEvaluationCorpusReader = posTagRegexCorpusReader;
        }
        this.addPosTagCorpusReaderFilters(posTagEvaluationCorpusReader);
        return posTagEvaluationCorpusReader;
    }

    void addPosTagCorpusReaderFilters(PosTagAnnotatedCorpusReader corpusReader) {
        if (!posTagCorpusReaderFiltersAdded) {
            MachineLearningModel myPosTaggerModel = null;
            if (this.getCommand() != Command.train) {
                if (this.getStartModule().equals(Module.Tokeniser)) {
                    myPosTaggerModel = this.getPosTaggerModel();
                } else if (this.getStartModule().equals(Module.PosTagger)) {
                    myPosTaggerModel = this.getPosTaggerModel();
                } else {
                    myPosTaggerModel = this.getParserModel();
                }
            } // do the models exist already?

            List<TokenFilter> tokenFilters = new ArrayListNoNulls<TokenFilter>();
            for (TokenFilter tokenFilter : this.getTokenFilters(myPosTaggerModel)) {
                tokenFilters.add(tokenFilter);
            }

            TokenSequenceFilter tokenFilterWrapper = this.getTokenFilterService()
                    .getTokenSequenceFilter(tokenFilters);
            corpusReader.addTokenSequenceFilter(tokenFilterWrapper);

            for (TokenSequenceFilter tokenSequenceFilter : this.getTokenSequenceFilters(myPosTaggerModel)) {
                corpusReader.addTokenSequenceFilter(tokenSequenceFilter);
            }

            for (PosTagSequenceFilter posTagSequenceFilter : this.getPosTagSequenceFilters(myPosTaggerModel)) {
                corpusReader.addPosTagSequenceFilter(posTagSequenceFilter);
            }

            posTagCorpusReaderFiltersAdded = true;
        }
    }

    /**
     * A parser corpus reader to read a corpus pre-annotated in dependencies.
     * @return
     */
    @Override
    public ParserAnnotatedCorpusReader getParserCorpusReader() {
        try {
            if (parserCorpusReader == null) {
                ParserRegexBasedCorpusReader parserRegexCorpusReader = this.getParserService()
                        .getRegexBasedCorpusReader(this.getReader());
                if (this.getInputRegex() != null)
                    parserRegexCorpusReader.setRegex(this.getInputRegex());
                parserRegexCorpusReader.setPredictTransitions(predictTransitions);
                if (this.excludeFileName != null)
                    parserRegexCorpusReader.setExcludeFileName(this.excludeFileName);

                if (corpusLexicalEntryRegexPath != null) {
                    File corpusLexicalEntryRegexFile = this.getFile(corpusLexicalEntryRegexPath);
                    if (!corpusLexicalEntryRegexFile.exists())
                        throw new TalismaneException(
                                "corpusLexicalEntryRegex file not found: " + corpusLexicalEntryRegexPath);

                    Scanner corpusLexicalEntryRegexScanner = new Scanner(new BufferedReader(new InputStreamReader(
                            new FileInputStream(corpusLexicalEntryRegexFile), this.getInputCharset().name())));
                    LexicalEntryReader lexicalEntryReader = new RegexLexicalEntryReader(
                            corpusLexicalEntryRegexScanner);
                    corpusLexicalEntryRegexScanner.close();
                    parserRegexCorpusReader.setLexicalEntryReader(lexicalEntryReader);
                } else {
                    LexicalEntryReader lexicalEntryReader = implementation.getDefaultCorpusLexicalEntryReader();
                    if (lexicalEntryReader != null) {
                        parserRegexCorpusReader.setLexicalEntryReader(lexicalEntryReader);
                    }
                }

                this.parserCorpusReader = parserRegexCorpusReader;
            }

            if (!parserCorpusReaderDecorated) {
                this.setCorpusReaderAttributes(parserCorpusReader);
                this.addParserCorpusReaderFilters(parserCorpusReader);
                parserCorpusReaderDecorated = true;
            }

            return parserCorpusReader;
        } catch (UnsupportedEncodingException e) {
            LogUtils.logError(LOG, e);
            throw new RuntimeException(e);
        } catch (FileNotFoundException e) {
            LogUtils.logError(LOG, e);
            throw new RuntimeException(e);
        }
    }

    void setCorpusReaderAttributes(AnnotatedCorpusReader corpusReader) {
        corpusReader.setMaxSentenceCount(maxSentenceCount);
        corpusReader.setStartSentence(startSentence);
        if (crossValidationSize > 0)
            corpusReader.setCrossValidationSize(crossValidationSize);
        if (includeIndex >= 0)
            corpusReader.setIncludeIndex(includeIndex);
        if (excludeIndex >= 0)
            corpusReader.setExcludeIndex(excludeIndex);
    }

    @Override
    public ParserAnnotatedCorpusReader getParserEvaluationCorpusReader() {
        if (parserEvaluationCorpusReader == null) {
            ParserRegexBasedCorpusReader parserRegexCorpusReader = this.getParserService()
                    .getRegexBasedCorpusReader(this.getEvaluationReader());
            if (this.getEvaluationRegex() != null)
                parserRegexCorpusReader.setRegex(this.getEvaluationRegex());
            parserRegexCorpusReader.setPredictTransitions(predictTransitions);

            this.parserEvaluationCorpusReader = parserRegexCorpusReader;
        }
        this.addParserCorpusReaderFilters(parserEvaluationCorpusReader);
        return parserEvaluationCorpusReader;
    }

    @Override
    public String getEvaluationFilePath() {
        return evaluationFilePath;
    }

    @Override
    public void setParserEvaluationCorpusReader(ParserAnnotatedCorpusReader parserEvaluationCorpusReader) {
        this.parserEvaluationCorpusReader = parserEvaluationCorpusReader;
    }

    @Override
    public void setPosTagEvaluationCorpusReader(PosTagAnnotatedCorpusReader posTagEvaluationCorpusReader) {
        this.posTagEvaluationCorpusReader = posTagEvaluationCorpusReader;
    }

    void addParserCorpusReaderFilters(ParserAnnotatedCorpusReader corpusReader) {
        if (!parserCorpusReaderFiltersAdded) {
            MachineLearningModel myPosTaggerModel = null;
            MachineLearningModel myParserModel = null;
            if (this.getCommand() != Command.train) {
                if (this.getStartModule().equals(Module.Tokeniser)) {
                    myPosTaggerModel = this.getPosTaggerModel();
                    myParserModel = this.getParserModel();
                } else if (this.getStartModule().equals(Module.PosTagger)) {
                    myPosTaggerModel = this.getPosTaggerModel();
                    myParserModel = this.getParserModel();
                } else {
                    myPosTaggerModel = this.getParserModel();
                    myParserModel = this.getParserModel();
                }
            } // models exist already?

            List<TokenFilter> tokenFilters = new ArrayListNoNulls<TokenFilter>();
            for (TokenFilter tokenFilter : this.getTokenFilters(myPosTaggerModel)) {
                tokenFilters.add(tokenFilter);
            }

            TokenSequenceFilter tokenFilterWrapper = this.getTokenFilterService()
                    .getTokenSequenceFilter(tokenFilters);
            corpusReader.addTokenSequenceFilter(tokenFilterWrapper);

            for (TokenSequenceFilter tokenFilter : this.getTokenSequenceFilters(myPosTaggerModel)) {
                corpusReader.addTokenSequenceFilter(tokenFilter);
            }
            for (PosTagSequenceFilter posTagSequenceFilter : this.getPosTagSequenceFilters(myParserModel)) {
                corpusReader.addPosTagSequenceFilter(posTagSequenceFilter);
            }
            parserCorpusReaderFiltersAdded = true;
        }
    }

    @Override
    public void setPosTagCorpusReader(PosTagAnnotatedCorpusReader posTagCorpusReader) {
        this.posTagCorpusReader = posTagCorpusReader;
    }

    @Override
    public void setParserCorpusReader(ParserAnnotatedCorpusReader parserCorpusReader) {
        this.parserCorpusReader = parserCorpusReader;
    }

    /**
     * Get a parser evaluator if command=evaluate and endModule=parser.
     * @return
     */
    @Override
    public ParserEvaluator getParserEvaluator() {
        try {
            if (parserEvaluator == null) {
                parserEvaluator = this.getParserService().getParserEvaluator();
                if (startModule.equals(Module.Tokeniser)) {
                    parserEvaluator.setTokeniser(this.getTokeniser());
                    parserEvaluator.setPosTagger(this.getPosTagger());
                } else if (startModule.equals(Module.PosTagger)) {
                    parserEvaluator.setPosTagger(this.getPosTagger());
                }
                parserEvaluator.setParser(this.getParser());

                File fscoreFile = new File(this.getOutDir(), this.getBaseName() + ".fscores.csv");

                ParseEvaluationFScoreCalculator parseFScoreCalculator = new ParseEvaluationFScoreCalculator(
                        fscoreFile);
                parseFScoreCalculator.setLabeledEvaluation(this.labeledEvaluation);
                parseFScoreCalculator.setSkipLabel(skipLabel);

                if (parserEvaluator.getTokeniser() != null)
                    parseFScoreCalculator.setHasTokeniser(true);
                if (parserEvaluator.getPosTagger() != null)
                    parseFScoreCalculator.setHasPosTagger(true);
                parserEvaluator.addObserver(parseFScoreCalculator);

                if (outputGuesses) {
                    File csvFile = new File(this.getOutDir(), this.getBaseName() + "_sentences.csv");
                    csvFile.delete();
                    csvFile.createNewFile();
                    Writer csvFileWriter = new BufferedWriter(
                            new OutputStreamWriter(new FileOutputStream(csvFile, false), "UTF8"));
                    int guessCount = 1;
                    if (outputGuessCount > 0)
                        guessCount = outputGuessCount;
                    else
                        guessCount = this.getParser().getBeamWidth();

                    ParseEvaluationSentenceWriter sentenceWriter = new ParseEvaluationSentenceWriter(csvFileWriter,
                            guessCount);
                    if (parserEvaluator.getTokeniser() != null)
                        sentenceWriter.setHasTokeniser(true);
                    if (parserEvaluator.getPosTagger() != null)
                        sentenceWriter.setHasPosTagger(true);
                    parserEvaluator.addObserver(sentenceWriter);
                }

                if (includeDistanceFScores) {
                    File csvFile = new File(this.getOutDir(), this.getBaseName() + "_distances.csv");
                    csvFile.delete();
                    csvFile.createNewFile();
                    Writer csvFileWriter = new BufferedWriter(
                            new OutputStreamWriter(new FileOutputStream(csvFile, false), "UTF8"));
                    ParserFScoreCalculatorByDistance calculator = new ParserFScoreCalculatorByDistance(
                            csvFileWriter);
                    calculator.setLabeledEvaluation(this.labeledEvaluation);
                    calculator.setSkipLabel(skipLabel);
                    parserEvaluator.addObserver(calculator);
                }

                if (includeTransitionLog) {
                    File csvFile = new File(this.getOutDir(), this.getBaseName() + "_transitions.csv");
                    csvFile.delete();
                    csvFile.createNewFile();
                    Writer csvFileWriter = new BufferedWriter(
                            new OutputStreamWriter(new FileOutputStream(csvFile, false), "UTF8"));
                    ParseConfigurationProcessor transitionLogWriter = this.getParserService()
                            .getTransitionLogWriter(csvFileWriter);
                    ParseEvaluationObserverImpl observer = new ParseEvaluationObserverImpl(transitionLogWriter);
                    observer.setWriter(csvFileWriter);
                    if (this.errorLabels != null)
                        observer.setErrorLabels(errorLabels);
                    parserEvaluator.addObserver(observer);
                }

                Reader templateReader = null;
                if (templatePath == null) {
                    templateReader = new BufferedReader(
                            new InputStreamReader(getInputStreamFromResource(parserTemplateName)));
                } else {
                    templateReader = new BufferedReader(new FileReader(this.getFile(templatePath)));
                }

                File freemarkerFile = new File(this.getOutDir(), this.getBaseName() + "_output.txt");
                freemarkerFile.delete();
                freemarkerFile.createNewFile();
                Writer freemakerFileWriter = new BufferedWriter(
                        new OutputStreamWriter(new FileOutputStream(freemarkerFile, false), "UTF8"));
                ParseEvaluationGuessTemplateWriter templateWriter = new ParseEvaluationGuessTemplateWriter(
                        freemakerFileWriter, templateReader);
                parserEvaluator.addObserver(templateWriter);
                parserEvaluator.setSentenceCount(maxSentenceCount);
                parserEvaluator.setPropagateBeam(propagateBeam);
            }

            return parserEvaluator;
        } catch (Exception e) {
            LogUtils.logError(LOG, e);
            throw new RuntimeException(e);
        }
    }

    /**
     * Get a parser comparator if command=compare and endModule=parser.
     * @return
     */
    @Override
    public ParseComparator getParseComparator() {
        try {
            if (parseComparator == null) {
                parseComparator = this.getParserService().getParseComparator();
                File fscoreFile = new File(this.getOutDir(), this.getBaseName() + ".fscores.csv");

                ParseEvaluationFScoreCalculator parseFScoreCalculator = new ParseEvaluationFScoreCalculator(
                        fscoreFile);
                parseFScoreCalculator.setLabeledEvaluation(this.labeledEvaluation);
                parseFScoreCalculator.setSkipLabel(skipLabel);

                parseComparator.addObserver(parseFScoreCalculator);

                if (includeDistanceFScores) {
                    File csvFile = new File(this.getOutDir(), this.getBaseName() + ".distances.csv");
                    csvFile.delete();
                    csvFile.createNewFile();
                    Writer csvFileWriter = new BufferedWriter(
                            new OutputStreamWriter(new FileOutputStream(csvFile, false), "UTF8"));
                    ParserFScoreCalculatorByDistance calculator = new ParserFScoreCalculatorByDistance(
                            csvFileWriter);
                    calculator.setLabeledEvaluation(this.labeledEvaluation);
                    calculator.setSkipLabel(skipLabel);
                    parseComparator.addObserver(calculator);
                }

                parseComparator.setSentenceCount(maxSentenceCount);
            }

            return parseComparator;
        } catch (Exception e) {
            LogUtils.logError(LOG, e);
            throw new RuntimeException(e);
        }
    }

    /**
     * Get a tokeniser evaluator if command=evaluate and endModule=tokeniser.
     * @return
     */
    @Override
    public TokeniserEvaluator getTokeniserEvaluator() {
        if (tokeniserEvaluator == null) {
            tokeniserEvaluator = this.getTokeniserService().getTokeniserEvaluator(this.getTokeniser());

            List<TokenEvaluationObserver> observers = this.getTokenEvaluationObservers();
            for (TokenEvaluationObserver observer : observers)
                tokeniserEvaluator.addObserver(observer);

            tokeniserEvaluator.setSentenceCount(maxSentenceCount);
        }
        return tokeniserEvaluator;
    }

    /**
     * Get a sentence detector evaluator if command=evaluate and endModule=sentenceDetector.
     * @return
     */
    @Override
    public SentenceDetectorEvaluator getSentenceDetectorEvaluator() {
        if (sentenceDetectorEvaluator == null) {
            sentenceDetectorEvaluator = this.getSentenceDetectorService().getEvaluator(this.getSentenceDetector());
        }
        return sentenceDetectorEvaluator;
    }

    private List<TokenEvaluationObserver> getTokenEvaluationObservers() {
        try {
            List<TokenEvaluationObserver> observers = new ArrayListNoNulls<TokenEvaluationObserver>();
            Writer errorFileWriter = null;
            File errorFile = new File(this.getOutDir(), this.getBaseName() + ".errorList.txt");
            errorFile.delete();
            errorFile.createNewFile();
            errorFileWriter = new BufferedWriter(
                    new OutputStreamWriter(new FileOutputStream(errorFile, false), "UTF8"));

            Writer csvErrorFileWriter = null;
            File csvErrorFile = new File(this.getOutDir(), this.getBaseName() + ".errors.csv");
            csvErrorFile.delete();
            csvErrorFile.createNewFile();
            csvErrorFileWriter = new BufferedWriter(
                    new OutputStreamWriter(new FileOutputStream(csvErrorFile, false), "UTF8"));

            File fScoreFile = new File(this.getOutDir(), this.getBaseName() + ".fscores.csv");

            TokenEvaluationFScoreCalculator tokenFScoreCalculator = new TokenEvaluationFScoreCalculator();
            tokenFScoreCalculator.setErrorWriter(errorFileWriter);
            tokenFScoreCalculator.setCsvErrorWriter(csvErrorFileWriter);
            tokenFScoreCalculator.setFScoreFile(fScoreFile);
            observers.add(tokenFScoreCalculator);

            Writer corpusFileWriter = null;
            File corpusFile = new File(this.getOutDir(), this.getBaseName() + ".corpus.txt");
            corpusFile.delete();
            corpusFile.createNewFile();
            corpusFileWriter = new BufferedWriter(
                    new OutputStreamWriter(new FileOutputStream(corpusFile, false), "UTF8"));

            TokenEvaluationCorpusWriter corpusWriter = new TokenEvaluationCorpusWriter(corpusFileWriter);
            observers.add(corpusWriter);

            Reader templateReader = null;
            if (templatePath == null) {
                templateReader = new BufferedReader(
                        new InputStreamReader(getInputStreamFromResource(tokeniserTemplateName)));
            } else {
                templateReader = new BufferedReader(new FileReader(this.getFile(templatePath)));
            }

            File freemarkerFile = new File(this.getOutDir(), this.getBaseName() + "_output.txt");
            freemarkerFile.delete();
            freemarkerFile.createNewFile();
            Writer freemakerFileWriter = new BufferedWriter(
                    new OutputStreamWriter(new FileOutputStream(freemarkerFile, false), "UTF8"));
            TokeniserGuessTemplateWriter templateWriter = new TokeniserGuessTemplateWriter(freemakerFileWriter,
                    templateReader);
            observers.add(templateWriter);

            return observers;
        } catch (IOException e) {
            LogUtils.logError(LOG, e);
            throw new RuntimeException(e);
        }
    }

    /**
     * Get a token comparator if command=compare and endModule=parser.
     * @return
     */
    @Override
    public TokenComparator getTokenComparator() {
        try {
            if (tokenComparator == null) {
                TokeniserPatternManager tokeniserPatternManager = null;

                Tokeniser tokeniser = this.getTokeniser();
                if (tokeniser instanceof PatternTokeniser) {
                    PatternTokeniser patternTokeniser = (PatternTokeniser) tokeniser;
                    tokeniserPatternManager = patternTokeniser.getTokeniserPatternManager();
                }
                tokenComparator = this.getTokeniserService().getTokenComparator(this.getTokenCorpusReader(),
                        this.getTokenEvaluationCorpusReader(), tokeniserPatternManager);

                List<TokenEvaluationObserver> observers = this.getTokenEvaluationObservers();
                for (TokenEvaluationObserver observer : observers)
                    tokenComparator.addObserver(observer);

                tokenComparator.setSentenceCount(maxSentenceCount);
            }

            return tokenComparator;
        } catch (Exception e) {
            LogUtils.logError(LOG, e);
            throw new RuntimeException(e);
        }
    }

    /**
     * Get a pos-tagger evaluator if command=evaluate and endModule=posTagger.
     * @return
     */
    @Override
    public PosTaggerEvaluator getPosTaggerEvaluator() {
        try {
            if (posTaggerEvaluator == null) {
                posTaggerEvaluator = this.getPosTaggerService().getPosTaggerEvaluator(this.getPosTagger());

                if (startModule.equals(Module.Tokeniser)) {
                    posTaggerEvaluator.setTokeniser(this.getTokeniser());
                }

                if (outputGuesses) {
                    File csvFile = new File(this.getOutDir(), this.getBaseName() + "_sentences.csv");
                    csvFile.delete();
                    csvFile.createNewFile();
                    Writer csvFileWriter = new BufferedWriter(
                            new OutputStreamWriter(new FileOutputStream(csvFile, false), "UTF8"));

                    int guessCount = 1;
                    if (outputGuessCount > 0)
                        guessCount = outputGuessCount;
                    else if (this.getPosTagger() instanceof NonDeterministicPosTagger)
                        guessCount = ((NonDeterministicPosTagger) this.getPosTagger()).getBeamWidth();

                    PosTagEvaluationSentenceWriter sentenceWriter = new PosTagEvaluationSentenceWriter(
                            csvFileWriter, guessCount);
                    posTaggerEvaluator.addObserver(sentenceWriter);
                }

                File fscoreFile = new File(this.getOutDir(), this.getBaseName() + "_fscores.csv");

                PosTagEvaluationFScoreCalculator posTagFScoreCalculator = new PosTagEvaluationFScoreCalculator(
                        fscoreFile);
                if (includeUnknownWordResults) {
                    File fscoreUnknownWordFile = new File(this.getOutDir(), this.getBaseName() + "_unknown.csv");
                    posTagFScoreCalculator.setFScoreUnknownInLexiconFile(fscoreUnknownWordFile);
                    File fscoreKnownWordFile = new File(this.getOutDir(), this.getBaseName() + "_known.csv");
                    posTagFScoreCalculator.setFScoreKnownInLexiconFile(fscoreKnownWordFile);
                }

                posTaggerEvaluator.addObserver(posTagFScoreCalculator);

                Reader templateReader = null;
                if (templatePath == null) {
                    templateReader = new BufferedReader(
                            new InputStreamReader(getInputStreamFromResource(posTaggerTemplateName)));
                } else {
                    templateReader = new BufferedReader(new FileReader(this.getFile(templatePath)));
                }

                File freemarkerFile = new File(this.getOutDir(), this.getBaseName() + "_output.txt");
                freemarkerFile.delete();
                freemarkerFile.createNewFile();
                Writer freemakerFileWriter = new BufferedWriter(
                        new OutputStreamWriter(new FileOutputStream(freemarkerFile, false), "UTF8"));
                PosTaggerGuessTemplateWriter templateWriter = new PosTaggerGuessTemplateWriter(freemakerFileWriter,
                        templateReader);
                posTaggerEvaluator.addObserver(templateWriter);

                if (includeLexiconCoverage) {
                    File lexiconCoverageFile = new File(this.getOutDir(), this.getBaseName() + ".unknown.csv");
                    PosTagEvaluationLexicalCoverageTester lexiconCoverageTester = new PosTagEvaluationLexicalCoverageTester(
                            lexiconCoverageFile);
                    posTaggerEvaluator.addObserver(lexiconCoverageTester);
                }

                posTaggerEvaluator.setPropagateBeam(propagateBeam);
                posTaggerEvaluator.setSentenceCount(maxSentenceCount);
            }
            return posTaggerEvaluator;
        } catch (Exception e) {
            LogUtils.logError(LOG, e);
            throw new RuntimeException(e);
        }
    }

    /**
     * Get a pos-tag comparator if command=compare and endModule=parser.
     * @return
     */
    @Override
    public PosTagComparator getPosTagComparator() {
        try {
            if (posTagComparator == null) {
                posTagComparator = this.getPosTaggerService().getPosTagComparator();
                File fscoreFile = new File(this.getOutDir(), this.getBaseName() + ".fscores.csv");

                PosTagEvaluationFScoreCalculator fScoreCalculator = new PosTagEvaluationFScoreCalculator(
                        fscoreFile);

                posTagComparator.addObserver(fScoreCalculator);

                posTagComparator.setSentenceCount(maxSentenceCount);
            }

            return posTagComparator;
        } catch (Exception e) {
            LogUtils.logError(LOG, e);
            throw new RuntimeException(e);
        }
    }

    /**
     * The base name, out of which to construct output file names.
     * @return
     */
    @Override
    public String getBaseName() {
        if (baseName == null) {
            baseName = "Talismane";
            if (outFilePath != null) {
                if (outFilePath.indexOf('.') > 0)
                    baseName = outFilePath.substring(outFilePath.lastIndexOf('/') + 1,
                            outFilePath.lastIndexOf('.'));
                else
                    baseName = outFilePath.substring(outFilePath.lastIndexOf('/') + 1);
            } else if (inFilePath != null) {
                if (inFilePath.indexOf('.') > 0)
                    baseName = inFilePath.substring(inFilePath.lastIndexOf('/') + 1, inFilePath.lastIndexOf('.'));
                else
                    baseName = inFilePath.substring(inFilePath.lastIndexOf('/') + 1);
            } else if (languageModelFilePath != null && module.equals(Talismane.Module.LanguageDetector)
                    || endModule.equals(Talismane.Module.LanguageDetector)) {
                if (languageModelFilePath.indexOf('.') > 0)
                    baseName = languageModelFilePath.substring(languageModelFilePath.lastIndexOf('/') + 1,
                            languageModelFilePath.lastIndexOf('.'));
                else
                    baseName = languageModelFilePath.substring(languageModelFilePath.lastIndexOf('/') + 1);
            } else if (sentenceModelFilePath != null && module.equals(Talismane.Module.SentenceDetector)
                    || endModule.equals(Talismane.Module.SentenceDetector)) {
                if (sentenceModelFilePath.indexOf('.') > 0)
                    baseName = sentenceModelFilePath.substring(sentenceModelFilePath.lastIndexOf('/') + 1,
                            sentenceModelFilePath.lastIndexOf('.'));
                else
                    baseName = sentenceModelFilePath.substring(sentenceModelFilePath.lastIndexOf('/') + 1);
            } else if (tokeniserModelFilePath != null && (module.equals(Talismane.Module.Tokeniser)
                    || endModule.equals(Talismane.Module.Tokeniser))) {
                if (tokeniserModelFilePath.indexOf('.') > 0)
                    baseName = tokeniserModelFilePath.substring(tokeniserModelFilePath.lastIndexOf('/') + 1,
                            tokeniserModelFilePath.lastIndexOf('.'));
                else
                    baseName = tokeniserModelFilePath.substring(tokeniserModelFilePath.lastIndexOf('/') + 1);
            } else if (posTaggerModelFilePath != null && (module.equals(Talismane.Module.PosTagger)
                    || endModule.equals(Talismane.Module.PosTagger))) {
                if (posTaggerModelFilePath.indexOf('.') > 0)
                    baseName = posTaggerModelFilePath.substring(posTaggerModelFilePath.lastIndexOf('/') + 1,
                            posTaggerModelFilePath.lastIndexOf('.'));
                else
                    baseName = posTaggerModelFilePath.substring(posTaggerModelFilePath.lastIndexOf('/') + 1);
            } else if (parserModelFilePath != null
                    && (module.equals(Talismane.Module.Parser) || endModule.equals(Talismane.Module.Parser))) {
                if (parserModelFilePath.indexOf('.') > 0)
                    baseName = parserModelFilePath.substring(parserModelFilePath.lastIndexOf('/') + 1,
                            parserModelFilePath.lastIndexOf('.'));
                else
                    baseName = parserModelFilePath.substring(parserModelFilePath.lastIndexOf('/') + 1);
            }
            baseName = baseName + suffix;
        }
        return baseName;
    }

    @Override
    public PosTaggerService getPosTaggerService() {
        return posTaggerService;
    }

    public void setPosTaggerService(PosTaggerService posTaggerService) {
        this.posTaggerService = posTaggerService;
    }

    @Override
    public ParserService getParserService() {
        return parserService;
    }

    public void setParserService(ParserService parserService) {
        this.parserService = parserService;
    }

    public PosTaggerFeatureService getPosTaggerFeatureService() {
        return posTaggerFeatureService;
    }

    public void setPosTaggerFeatureService(PosTaggerFeatureService posTaggerFeatureService) {
        this.posTaggerFeatureService = posTaggerFeatureService;
    }

    public ParserFeatureService getParserFeatureService() {
        return parserFeatureService;
    }

    public void setParserFeatureService(ParserFeatureService parserFeatureService) {
        this.parserFeatureService = parserFeatureService;
    }

    @Override
    public FilterService getFilterService() {
        return filterService;
    }

    public void setFilterService(FilterService filterService) {
        this.filterService = filterService;
    }

    @Override
    public TokenFilterService getTokenFilterService() {
        return tokenFilterService;
    }

    private PosTagFilterService getPosTagFilterService() {
        return posTagFilterService;
    }

    public void setTokenFilterService(TokenFilterService tokenFilterService) {
        this.tokenFilterService = tokenFilterService;
    }

    public SentenceDetectorService getSentenceDetectorService() {
        return sentenceDetectorService;
    }

    public void setSentenceDetectorService(SentenceDetectorService sentenceDetectorService) {
        this.sentenceDetectorService = sentenceDetectorService;
    }

    public LanguageDetectorService getLanguageDetectorService() {
        return languageDetectorService;
    }

    public void setLanguageDetectorService(LanguageDetectorService languageDetectorService) {
        this.languageDetectorService = languageDetectorService;
    }

    public SentenceDetectorFeatureService getSentenceDetectorFeatureService() {
        return sentenceDetectorFeatureService;
    }

    public void setSentenceDetectorFeatureService(SentenceDetectorFeatureService sentenceDetectorFeatureService) {
        this.sentenceDetectorFeatureService = sentenceDetectorFeatureService;
    }

    public MachineLearningService getMachineLearningService() {
        return machineLearningService;
    }

    public void setMachineLearningService(MachineLearningService machineLearningService) {
        this.machineLearningService = machineLearningService;
    }

    public TokeniserPatternService getTokeniserPatternService() {
        return tokeniserPatternService;
    }

    public void setTokeniserPatternService(TokeniserPatternService tokeniserPatternService) {
        this.tokeniserPatternService = tokeniserPatternService;
    }

    public TokenFeatureService getTokenFeatureService() {
        return tokenFeatureService;
    }

    public void setTokenFeatureService(TokenFeatureService tokenFeatureService) {
        this.tokenFeatureService = tokenFeatureService;
    }

    @Override
    public TokeniserService getTokeniserService() {
        return tokeniserService;
    }

    public void setTokeniserService(TokeniserService tokeniserService) {
        this.tokeniserService = tokeniserService;
    }

    TalismaneServiceInternal getTalismaneServiceInternal() {
        this.getTalismaneService();
        return talismaneService;
    }

    @Override
    public TalismaneService getTalismaneService() {
        return talismaneService;
    }

    public void setTalismaneService(TalismaneService talismaneService) {
        this.talismaneService = (TalismaneServiceInternal) talismaneService;
        this.talismaneSession = talismaneService.getTalismaneSession();
        this.talismaneSession.setImplementation(this.implementation);
    }

    /**
     * Does this instance of Talismane need a sentence detector to perform the requested processing.
     */
    @Override
    public boolean needsSentenceDetector() {
        return startModule.compareTo(Module.SentenceDetector) <= 0
                && endModule.compareTo(Module.SentenceDetector) >= 0;
    }

    /**
     * Does this instance of Talismane need a tokeniser to perform the requested processing.
     */
    @Override
    public boolean needsTokeniser() {
        return startModule.compareTo(Module.Tokeniser) <= 0 && endModule.compareTo(Module.Tokeniser) >= 0;
    }

    /**
     * Does this instance of Talismane need a pos tagger to perform the requested processing.
     */
    @Override
    public boolean needsPosTagger() {
        return startModule.compareTo(Module.PosTagger) <= 0 && endModule.compareTo(Module.PosTagger) >= 0;
    }

    /**
     * Does this instance of Talismane need a parser to perform the requested processing.
     */
    @Override
    public boolean needsParser() {
        return startModule.compareTo(Module.Parser) <= 0 && endModule.compareTo(Module.Parser) >= 0;
    }

    private static InputStream getInputStreamFromResource(String resource) {
        String path = "/com/joliciel/talismane/output/" + resource;
        InputStream inputStream = Talismane.class.getResourceAsStream(path);

        return inputStream;
    }

    @Override
    public String getInFilePath() {
        return inFilePath;
    }

    @Override
    public boolean isLogStats() {
        return logStats;
    }

    public LanguageDetectorAnnotatedCorpusReader getLanguageCorpusReader() {
        try {
            if (languageCorpusReader == null) {
                File languageCorpusMapFile = this.getFile(languageCorpusMapPath);
                Scanner languageCorpusMapScanner = new Scanner(new BufferedReader(new InputStreamReader(
                        new FileInputStream(languageCorpusMapFile), this.getInputCharset().name())));

                Map<Locale, Reader> languageMap = new HashMap<Locale, Reader>();
                while (languageCorpusMapScanner.hasNextLine()) {
                    String line = languageCorpusMapScanner.nextLine();
                    String[] parts = line.split("\t");
                    Locale locale = Locale.forLanguageTag(parts[0]);
                    String corpusPath = parts[1];
                    File corpusFile = this.getFile(corpusPath);
                    Reader corpusReader = new BufferedReader(
                            new InputStreamReader(new FileInputStream(corpusFile), this.getInputCharset().name()));
                    languageMap.put(locale, corpusReader);
                }
                languageCorpusMapScanner.close();
                languageCorpusReader = this.getLanguageDetectorService().getDefaultReader(languageMap);
            }
            this.setCorpusReaderAttributes(languageCorpusReader);
            return languageCorpusReader;
        } catch (IOException e) {
            LogUtils.logError(LOG, e);
            throw new RuntimeException(e);
        }
    }

    @Override
    public SentenceDetectorAnnotatedCorpusReader getSentenceCorpusReader() {
        if (sentenceCorpusReader == null) {
            sentenceCorpusReader = this.getSentenceDetectorService().getDefaultReader(this.getReader());
        }
        this.setCorpusReaderAttributes(sentenceCorpusReader);
        return sentenceCorpusReader;
    }

    @Override
    public void setSentenceCorpusReader(SentenceDetectorAnnotatedCorpusReader sentenceCorpusReader) {
        this.sentenceCorpusReader = sentenceCorpusReader;
    }

    @Override
    public int getTokeniserBeamWidth() {
        return tokeniserBeamWidth;
    }

    @Override
    public int getPosTaggerBeamWidth() {
        return posTaggerBeamWidth;
    }

    @Override
    public int getParserBeamWidth() {
        return parserBeamWidth;
    }

    @Override
    public boolean isPropagateTokeniserBeam() {
        return propagateTokeniserBeam;
    }

    @Override
    public boolean isPropagatePosTaggerBeam() {
        return propagateBeam;
    }

    /**
     * the minimum block size, in characters, to process by the sentence detector. Filters are applied to a concatenation of the previous block, the current block,
     * and the next block prior to sentence detection, in order to ensure that a filter which crosses block boundaries is correctly applied.
     * It is not legal to have a filter which matches text greater than a block size, since this could result in a filter which stops analysis but doesn't start it again correctly,
     * or vice versa. Block size can be increased if really big filters are really required. Default is 1000.
     * @return
     */
    @Override
    public int getBlockSize() {
        return blockSize;
    }

    @Override
    public void setBlockSize(int blockSize) {
        this.blockSize = blockSize;
    }

    @Override
    public File getPerformanceConfigFile() {
        return performanceConfigFile;
    }

    @Override
    public void setPerformanceConfigFile(File performanceConfigFile) {
        this.performanceConfigFile = performanceConfigFile;
    }

    /**
     * Should the parser corpus reader predict the transitions or not?
     * @return
     */
    @Override
    public boolean isPredictTransitions() {
        return predictTransitions;
    }

    @Override
    public void setPredictTransitions(boolean predictTransitions) {
        this.predictTransitions = predictTransitions;
    }

    @Override
    public Mode getMode() {
        return mode;
    }

    @Override
    public void setMode(Mode mode) {
        this.mode = mode;
    }

    @Override
    public Talismane getTalismane() {
        Talismane talismane = null;
        if (this.getMode() == Mode.normal) {
            talismane = (this.getTalismaneServiceInternal()).getTalismane(this);
        } else if (this.getMode() == Mode.server) {
            talismane = this.getTalismaneServiceInternal().getTalismaneServer(this);
        } else {
            throw new TalismaneException("Unknown mode: " + this.getMode().name());
        }
        return talismane;
    }

    @Override
    public Map<String, Object> getTrainParameters() {
        Map<String, Object> trainParameters = new HashMap<String, Object>();
        if (algorithm == MachineLearningAlgorithm.MaxEnt) {
            trainParameters.put(MaxentModelTrainer.MaxentModelParameter.Iterations.name(), iterations);
            trainParameters.put(MaxentModelTrainer.MaxentModelParameter.Cutoff.name(), cutoff);
        } else if (algorithm == MachineLearningAlgorithm.Perceptron
                || algorithm == MachineLearningAlgorithm.PerceptronRanking) {
            trainParameters.put(PerceptronClassificationModelTrainer.PerceptronModelParameter.Iterations.name(),
                    iterations);
            trainParameters.put(PerceptronClassificationModelTrainer.PerceptronModelParameter.Cutoff.name(),
                    cutoff);
            trainParameters.put(
                    PerceptronClassificationModelTrainer.PerceptronModelParameter.AverageAtIntervals.name(),
                    averageAtIntervals);

            if (perceptronTolerance >= 0)
                trainParameters.put(PerceptronClassificationModelTrainer.PerceptronModelParameter.Tolerance.name(),
                        perceptronTolerance);
        } else if (algorithm == MachineLearningAlgorithm.LinearSVM) {
            trainParameters.put(LinearSVMModelTrainer.LinearSVMModelParameter.Cutoff.name(), cutoff);
            if (solverType != null)
                trainParameters.put(LinearSVMModelTrainer.LinearSVMModelParameter.SolverType.name(), solverType);
            if (constraintViolationCost >= 0)
                trainParameters.put(LinearSVMModelTrainer.LinearSVMModelParameter.ConstraintViolationCost.name(),
                        constraintViolationCost);
            if (epsilon >= 0)
                trainParameters.put(LinearSVMModelTrainer.LinearSVMModelParameter.Epsilon.name(), epsilon);
        }
        return trainParameters;
    }

    @Override
    public Map<String, List<String>> getDescriptors() {
        if (this.descriptors == null) {
            descriptors = new HashMap<String, List<String>>();
        }
        return descriptors;
    }

    @Override
    public MachineLearningAlgorithm getAlgorithm() {
        return algorithm;
    }

    @Override
    public ExternalResourceFinder getExternalResourceFinder() {
        return externalResourceFinder;
    }

    @Override
    public List<Integer> getPerceptronObservationPoints() {
        return perceptronObservationPoints;
    }

    @Override
    public ParsingConstrainer getParsingConstrainer() {
        if (parsingConstrainer == null) {
            if (parsingConstrainerPath == null) {
                throw new RuntimeException("Missing argument: parsingConstrainer");
            }
            parsingConstrainer = parserService.getParsingConstrainer(this.getFile(parsingConstrainerPath));
        }
        return parsingConstrainer;
    }

    @Override
    public String getPosTaggerModelFilePath() {
        return posTaggerModelFilePath;
    }

    @Override
    public String getTokeniserModelFilePath() {
        return tokeniserModelFilePath;
    }

    @Override
    public String getSentenceModelFilePath() {
        return sentenceModelFilePath;
    }

    public String getLanguageModelFilePath() {
        return languageModelFilePath;
    }

    @Override
    public String getParserModelFilePath() {
        return parserModelFilePath;
    }

    @Override
    public PatternTokeniserType getPatternTokeniserType() {
        return patternTokeniserType;
    }

    /**
     * The port where the Talismane Server should listen.
     * @return
     */
    @Override
    public int getPort() {
        return port;
    }

    /**
     * The first sentence index to process.
     * @return
     */
    @Override
    public int getStartSentence() {
        return startSentence;
    }

    @Override
    public void preloadResources() {
        LOG.info("Loading shared resources...");

        if (preloadLexicon) {
            LOG.info("Loading lexicon");
            // ping the lexicon to load it
            talismaneSession.getMergedLexicon();
        }

        // ping the models to load them
        if (this.needsSentenceDetector()) {
            LOG.info("Loading sentence detector");
            if (this.getSentenceDetector() == null) {
                throw new TalismaneException("Sentence detector not provided.");
            }
        }
        if (this.needsTokeniser()) {
            LOG.info("Loading tokeniser");
            if (this.getTokeniser() == null) {
                throw new TalismaneException("Tokeniser not provided.");
            }
        }
        if (this.needsPosTagger()) {
            LOG.info("Loading pos tagger");
            if (this.getPosTagger() == null) {
                throw new TalismaneException("Pos-tagger not provided.");
            }
        }
        if (this.needsParser()) {
            LOG.info("Loading parser");
            if (this.getParser() == null) {
                throw new TalismaneException("Parser not provided.");
            }
        }
    }

    public File getBaseDir() {
        return baseDir;
    }

    public void setBaseDir(File baseDir) {
        this.baseDir = baseDir;
    }

    private File getFile(String path) {
        File file = new File(path);
        if (!file.isAbsolute() && baseDir != null) {
            file = new File(baseDir, path);
        }
        return file;
    }

    public Locale getLocale() {
        if (locale == null) {
            return this.implementation.getLocale();
        }
        return locale;
    }

    public void setLocale(Locale locale) {
        this.locale = locale;
    }

    public String getLanguageCorpusMapPath() {
        return languageCorpusMapPath;
    }

    public void setLanguageCorpusMapPath(String languageCorpusMapPath) {
        this.languageCorpusMapPath = languageCorpusMapPath;
    }

    @Override
    public LanguageDetectorProcessor getLanguageDetectorProcessor() {
        if (this.languageDetectorProcessor == null) {
            this.languageDetectorProcessor = this.getLanguageDetectorService()
                    .getDefaultLanguageDetectorProcessor(this.getWriter());
        }
        return this.languageDetectorProcessor;
    }

    public void setLanguageDetectorProcessor(LanguageDetectorProcessor languageDetectorProcessor) {
        this.languageDetectorProcessor = languageDetectorProcessor;
    }

    @Override
    public LanguageImplementation getLanguageImplementation() {
        return implementation;
    }

    @Override
    public void addTokenFilter(TokenFilter tokenFilter) {
        this.additionalTokenFilters.add(tokenFilter);
        if (this.tokenFilters != null)
            this.tokenFilters.add(tokenFilter);
    }

    @Override
    public void prependTokenFilter(TokenFilter tokenFilter) {
        this.prependedTokenFilters.add(0, tokenFilter);
        if (this.tokenFilters != null)
            this.tokenFilters.add(0, tokenFilter);
    }

}