com.joliciel.talismane.TalismaneConfig.java Source code

Java tutorial

Introduction

Here is the source code for com.joliciel.talismane.TalismaneConfig.java

Source

///////////////////////////////////////////////////////////////////////////////
//Copyright (C) 2012 Assaf Urieli
//
//This file is part of Talismane.
//
//Talismane is free software: you can redistribute it and/or modify
//it under the terms of the GNU Affero General Public License as published by
//the Free Software Foundation, either version 3 of the License, or
//(at your option) any later version.
//
//Talismane is distributed in the hope that it will be useful,
//but WITHOUT ANY WARRANTY; without even the implied warranty of
//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
//GNU Affero General Public License for more details.
//
//You should have received a copy of the GNU Affero General Public License
//along with Talismane.  If not, see <http://www.gnu.org/licenses/>.
//////////////////////////////////////////////////////////////////////////////
package com.joliciel.talismane;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.Reader;
import java.io.Writer;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import java.util.Map.Entry;
import java.util.Scanner;
import java.util.zip.ZipInputStream;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.log4j.PropertyConfigurator;

import com.joliciel.talismane.Talismane.Command;
import com.joliciel.talismane.Talismane.Mode;
import com.joliciel.talismane.Talismane.Module;
import com.joliciel.talismane.Talismane.Option;
import com.joliciel.talismane.filters.FilterService;
import com.joliciel.talismane.filters.MarkerFilterType;
import com.joliciel.talismane.filters.TextMarkerFilter;
import com.joliciel.talismane.lexicon.LexiconChain;
import com.joliciel.talismane.lexicon.LexiconDeserializer;
import com.joliciel.talismane.lexicon.PosTaggerLexicon;
import com.joliciel.talismane.machineLearning.ClassificationEventStream;
import com.joliciel.talismane.machineLearning.ClassificationObserver;
import com.joliciel.talismane.machineLearning.ClassificationModel;
import com.joliciel.talismane.machineLearning.ExternalResource;
import com.joliciel.talismane.machineLearning.ExternalResourceFinder;
import com.joliciel.talismane.machineLearning.MachineLearningAlgorithm;
import com.joliciel.talismane.machineLearning.MachineLearningModel;
import com.joliciel.talismane.machineLearning.MachineLearningService;
import com.joliciel.talismane.machineLearning.MachineLearningSession;
import com.joliciel.talismane.machineLearning.linearsvm.LinearSVMModelTrainer;
import com.joliciel.talismane.machineLearning.linearsvm.LinearSVMModelTrainer.LinearSVMSolverType;
import com.joliciel.talismane.machineLearning.maxent.MaxentModelTrainer;
import com.joliciel.talismane.machineLearning.perceptron.PerceptronClassificationModelTrainer;
import com.joliciel.talismane.machineLearning.perceptron.PerceptronService.PerceptronScoring;
import com.joliciel.talismane.output.FreemarkerTemplateWriter;
import com.joliciel.talismane.parser.ParseComparator;
import com.joliciel.talismane.parser.ParseComparisonStrategy;
import com.joliciel.talismane.parser.ParseConfigurationProcessor;
import com.joliciel.talismane.parser.ParseEvaluationFScoreCalculator;
import com.joliciel.talismane.parser.ParseEvaluationGuessTemplateWriter;
import com.joliciel.talismane.parser.ParseEvaluationSentenceWriter;
import com.joliciel.talismane.parser.Parser;
import com.joliciel.talismane.parser.Parser.ParseComparisonStrategyType;
import com.joliciel.talismane.parser.ParserAnnotatedCorpusReader;
import com.joliciel.talismane.parser.ParserEvaluator;
import com.joliciel.talismane.parser.ParserFScoreCalculatorByDistance;
import com.joliciel.talismane.parser.ParserRegexBasedCorpusReader;
import com.joliciel.talismane.parser.ParserService;
import com.joliciel.talismane.parser.ParsingConstrainer;
import com.joliciel.talismane.parser.Transition;
import com.joliciel.talismane.parser.TransitionBasedParser;
import com.joliciel.talismane.parser.TransitionSystem;
import com.joliciel.talismane.parser.features.ParseConfigurationFeature;
import com.joliciel.talismane.parser.features.ParserFeatureService;
import com.joliciel.talismane.parser.features.ParserRule;
import com.joliciel.talismane.posTagger.NonDeterministicPosTagger;
import com.joliciel.talismane.posTagger.PosTag;
import com.joliciel.talismane.posTagger.PosTagAnnotatedCorpusReader;
import com.joliciel.talismane.posTagger.PosTagComparator;
import com.joliciel.talismane.posTagger.PosTagEvaluationFScoreCalculator;
import com.joliciel.talismane.posTagger.PosTagEvaluationLexicalCoverageTester;
import com.joliciel.talismane.posTagger.PosTagEvaluationSentenceWriter;
import com.joliciel.talismane.posTagger.PosTagRegexBasedCorpusReader;
import com.joliciel.talismane.posTagger.PosTagSequenceProcessor;
import com.joliciel.talismane.posTagger.PosTagSet;
import com.joliciel.talismane.posTagger.PosTagger;
import com.joliciel.talismane.posTagger.PosTaggerEvaluator;
import com.joliciel.talismane.posTagger.PosTaggerGuessTemplateWriter;
import com.joliciel.talismane.posTagger.PosTaggerService;
import com.joliciel.talismane.posTagger.features.PosTaggerFeature;
import com.joliciel.talismane.posTagger.features.PosTaggerFeatureService;
import com.joliciel.talismane.posTagger.features.PosTaggerRule;
import com.joliciel.talismane.posTagger.filters.PosTagFilterService;
import com.joliciel.talismane.posTagger.filters.PosTagSequenceFilter;
import com.joliciel.talismane.sentenceDetector.SentenceDetector;
import com.joliciel.talismane.sentenceDetector.SentenceDetectorAnnotatedCorpusReader;
import com.joliciel.talismane.sentenceDetector.SentenceDetectorOutcome;
import com.joliciel.talismane.sentenceDetector.SentenceDetectorService;
import com.joliciel.talismane.sentenceDetector.SentenceProcessor;
import com.joliciel.talismane.sentenceDetector.features.SentenceDetectorFeature;
import com.joliciel.talismane.sentenceDetector.features.SentenceDetectorFeatureService;
import com.joliciel.talismane.tokeniser.TokenComparator;
import com.joliciel.talismane.tokeniser.TokenEvaluationCorpusWriter;
import com.joliciel.talismane.tokeniser.TokenEvaluationFScoreCalculator;
import com.joliciel.talismane.tokeniser.TokenEvaluationObserver;
import com.joliciel.talismane.tokeniser.TokenRegexBasedCorpusReader;
import com.joliciel.talismane.tokeniser.TokenSequenceProcessor;
import com.joliciel.talismane.tokeniser.Tokeniser;
import com.joliciel.talismane.tokeniser.TokeniserAnnotatedCorpusReader;
import com.joliciel.talismane.tokeniser.TokeniserEvaluator;
import com.joliciel.talismane.tokeniser.TokeniserGuessTemplateWriter;
import com.joliciel.talismane.tokeniser.TokeniserOutcome;
import com.joliciel.talismane.tokeniser.TokeniserService;
import com.joliciel.talismane.tokeniser.features.TokenFeatureService;
import com.joliciel.talismane.tokeniser.features.TokenPatternMatchFeature;
import com.joliciel.talismane.tokeniser.features.TokeniserContextFeature;
import com.joliciel.talismane.tokeniser.filters.TokenFilter;
import com.joliciel.talismane.tokeniser.filters.TokenFilterService;
import com.joliciel.talismane.tokeniser.filters.TokenSequenceFilter;
import com.joliciel.talismane.tokeniser.patterns.PatternTokeniser;
import com.joliciel.talismane.tokeniser.patterns.TokeniserPatternManager;
import com.joliciel.talismane.tokeniser.patterns.TokeniserPatternService;
import com.joliciel.talismane.tokeniser.patterns.TokeniserPatternService.PatternTokeniserType;
import com.joliciel.talismane.utils.LogUtils;

/**
 * An abstract base class for loading, storing and translating configuration information to be passed to Talismane when processing.<br/>
 * Implementing classes must include language-specific implementation resources.<br/>
 * The processing must go from a given start module to a given end module in sequence, where the modules available are:
 * Sentence detector, Tokeniser, Pos tagger, Parser.<br/>
 * There is a default input format for each start module,
 * which can be over-ridden by providing a regex for processing lines of input. The default format is:<br/>
 * <li>Sentence detector: newlines indicate sentence breaks.</li>
 * <li>Tokeniser: expect exactly one token per line.</li>
 * <li>Pos tagger: {@link  com.joliciel.talismane.tokeniser.TokenRegexBasedCorpusReader#DEFAULT_REGEX default regex} </li>
 * <li>Parser: {@link  com.joliciel.talismane.posTagger.PosTagRegexBasedCorpusReader#DEFAULT_REGEX default regex} </li>
 * @author Assaf Urieli
 *
 */
public class TalismaneConfig {
    private static final Log LOG = LogFactory.getLog(TalismaneConfig.class);
    private Command command = null;
    private Option option = null;
    private Mode mode = Mode.normal;

    private Module startModule = null;
    private Module endModule = null;
    private Module module = null;

    private SentenceDetector sentenceDetector;
    private Tokeniser tokeniser;
    private PosTagger posTagger;
    private Parser parser;

    private ParserEvaluator parserEvaluator;
    private PosTaggerEvaluator posTaggerEvaluator;
    private TokeniserEvaluator tokeniserEvaluator;
    private ParseComparator parseComparator;
    private PosTagComparator posTagComparator;
    private TokenComparator tokenComparator;

    private TokeniserAnnotatedCorpusReader tokenCorpusReader = null;
    private PosTagAnnotatedCorpusReader posTagCorpusReader = null;
    private ParserAnnotatedCorpusReader parserCorpusReader = null;
    private ParserAnnotatedCorpusReader parserEvaluationCorpusReader = null;
    private PosTagAnnotatedCorpusReader posTagEvaluationCorpusReader = null;
    private TokeniserAnnotatedCorpusReader tokenEvaluationCorpusReader = null;
    private SentenceDetectorAnnotatedCorpusReader sentenceCorpusReader = null;

    private SentenceProcessor sentenceProcessor;
    private TokenSequenceProcessor tokenSequenceProcessor;
    private PosTagSequenceProcessor posTagSequenceProcessor;
    private ParseConfigurationProcessor parseConfigurationProcessor;

    private ClassificationModel<TokeniserOutcome> tokeniserModel = null;
    private ClassificationModel<PosTag> posTaggerModel = null;
    private MachineLearningModel parserModel = null;

    private boolean processByDefault = true;
    private int maxSentenceCount = 0;
    private int beamWidth = 1;
    private boolean propagateBeam = true;
    private boolean includeDetails = false;
    private Charset inputCharset = null;
    private Charset outputCharset = null;

    private int tokeniserBeamWidth = 1;
    private int posTaggerBeamWidth = -1;
    private int parserBeamWidth = -1;
    private boolean propagateTokeniserBeam = false;

    private char endBlockCharacter = '\f';
    private String inputRegex;
    private String inputPatternFilePath = null;
    private String evaluationRegex;
    private String evaluationPatternFilePath = null;
    private int maxParseAnalysisTime = 60;
    private int minFreeMemory = 64;
    private boolean earlyStop = false;

    private Reader reader = null;
    private Writer writer = null;
    private Reader evaluationReader = null;

    private String inFilePath = null;
    private String outFilePath = null;
    private String outDirPath = null;
    private String parserModelFilePath = null;
    private String posTaggerModelFilePath = null;
    private String tokeniserModelFilePath = null;
    private String sentenceModelFilePath = null;
    private String textFiltersPath = null;
    private String tokenFiltersPath = null;
    private String tokenSequenceFilterPath = null;
    private String posTagSequenceFilterPath = null;
    private String templatePath = null;
    private String evaluationFilePath = null;
    private String sentenceReaderPath = null;
    private String posTaggerRuleFilePath = null;
    private String posTaggerFeaturePath = null;
    private String tokeniserFeaturePath = null;
    private String tokeniserPatternFilePath = null;
    private String sentenceFeaturePath = null;

    private String lexiconDirPath = null;
    private boolean replaceLexicon = false;

    private String sentenceTemplateName = "sentence_template.ftl";
    private String tokeniserTemplateName = "tokeniser_template.ftl";
    private String posTaggerTemplateName = "posTagger_template.ftl";
    private String parserTemplateName = "parser_conll_template.ftl";

    private String fileName = null;
    private boolean logStats = false;
    private File outDir = null;
    private String baseName = null;
    private String suffix = "";
    private boolean outputGuesses = false;
    private int outputGuessCount = 0;
    private boolean labeledEvaluation = true;
    private boolean dynamiseFeatures = false;
    private String skipLabel = null;

    private List<PosTaggerRule> posTaggerRules = null;
    private List<ParserRule> parserRules = null;
    private String parserRuleFilePath = null;
    private String parserFeaturePath = null;
    private List<TextMarkerFilter> textMarkerFilters = null;
    private List<TokenFilter> tokenFilters = null;
    private List<TokenSequenceFilter> tokenSequenceFilters = null;
    private List<PosTagSequenceFilter> posTaggerPostProcessingFilters = null;
    private boolean includeDistanceFScores = false;
    private boolean predictTransitions = false;
    private boolean posTaggerRulesReplace = false;
    private boolean parserRulesReplace = false;

    private MarkerFilterType newlineMarker = MarkerFilterType.SENTENCE_BREAK;
    private int blockSize = 1000;

    private int crossValidationSize = -1;
    private int includeIndex = -1;
    private int excludeIndex = -1;

    private Set<String> testWords = null;
    private Set<SentenceDetectorFeature<?>> sentenceFeatures;
    private Set<TokeniserContextFeature<?>> tokeniserContextFeatures;
    private Set<TokenPatternMatchFeature<?>> tokenPatternMatchFeatures;
    private Set<PosTaggerFeature<?>> posTaggerFeatures;
    private Set<ParseConfigurationFeature<?>> parserFeatures;
    private TokeniserPatternManager tokeniserPatternManager;
    private ClassificationEventStream classificationEventStream;
    private PatternTokeniserType patternTokeniserType = PatternTokeniserType.Compound;

    private boolean parserCorpusReaderFiltersAdded = false;
    private boolean posTagCorpusReaderFiltersAdded = false;
    private boolean tokenCorpusReaderFiltersAdded = false;
    private boolean parserCorpusReaderDecorated = false;

    private TalismaneServiceLocator talismaneServiceLocator = null;
    private TalismaneServiceInternal talismaneService;
    private PosTaggerService posTaggerService;
    private ParserService parserService;
    private PosTaggerFeatureService posTaggerFeatureService;
    private ParserFeatureService parserFeatureService;
    private FilterService filterService;
    private TokenFilterService tokenFilterService;
    private SentenceDetectorService sentenceDetectorService;
    private SentenceDetectorFeatureService sentenceDetectorFeatureService;
    private MachineLearningService machineLearningService;
    private TokeniserPatternService tokeniserPatternService;
    private TokenFeatureService tokenFeatureService;
    private TokeniserService tokeniserService;
    private PosTagFilterService posTagFilterService;

    private File performanceConfigFile;
    private ParseComparisonStrategyType parseComparisonStrategyType;
    private boolean includeLexiconCoverage = false;

    // training parameters
    int iterations = 0;
    int cutoff = 0;
    MachineLearningAlgorithm algorithm = MachineLearningAlgorithm.MaxEnt;
    double constraintViolationCost = -1;
    double epsilon = -1;
    LinearSVMSolverType solverType = null;
    double perceptronTolerance = -1;
    boolean averageAtIntervals = false;
    List<Integer> perceptronObservationPoints = null;
    String dependencyLabelPath = null;
    String excludeFileName = null;

    ExternalResourceFinder externalResourceFinder = null;
    Map<String, List<String>> descriptors = null;
    String parsingConstrainerPath = null;
    ParsingConstrainer parsingConstrainer = null;
    LanguageSpecificImplementation implementation;

    public TalismaneConfig(String[] args, LanguageSpecificImplementation implementation) throws Exception {
        TalismaneSession.setImplementation(implementation);
        this.implementation = implementation;
        talismaneServiceLocator = TalismaneServiceLocator.getInstance();
        Map<String, String> argMap = convertArgs(args);
        this.loadParameters(argMap);
    }

    public TalismaneConfig(Map<String, String> args, LanguageSpecificImplementation implementation)
            throws Exception {
        TalismaneSession.setImplementation(implementation);
        this.implementation = implementation;
        talismaneServiceLocator = TalismaneServiceLocator.getInstance();
        this.loadParameters(args);
    }

    public static Map<String, String> convertArgs(String[] args) {
        Map<String, String> argMap = new HashMap<String, String>();
        for (String arg : args) {
            int equalsPos = arg.indexOf('=');
            String argName = arg.substring(0, equalsPos);
            String argValue = arg.substring(equalsPos + 1);
            if (argMap.containsKey(argName))
                throw new TalismaneException("Duplicate command-line argument: " + argName);
            argMap.put(argName, argValue);
        }
        return argMap;
    }

    void loadParameters(Map<String, String> args) throws Exception {
        if (args.size() == 0) {
            System.out.println("Talismane usage instructions: ");
            System.out.println("* indicates optional, + indicates default value");
            System.out.println("");
            System.out.println(
                    "Usage: command=analyse *startModule=[sentence+|tokenise|postag|parse] *endModule=[sentence|tokenise|postag|parse+] *inFile=[inFilePath, stdin if missing] *outFile=[outFilePath, stdout if missing] *template=[outputTemplatePath]");
            System.out.println("");
            System.out.println("Additional optional parameters:");
            System.out.println(
                    " *encoding=[UTF-8, ...] *includeDetails=[true|false+] posTaggerRules*=[posTaggerRuleFilePath] textFilters*=[regexFilterFilePath] *sentenceModel=[path] *tokeniserModel=[path] *posTaggerModel=[path] *parserModel=[path] *inputPatternFile=[inputPatternFilePath] *posTagSet=[posTagSetPath]");
            return;
        }

        String logConfigPath = args.get("logConfigFile");
        if (logConfigPath != null) {
            args.remove("logConfigFile");
            Properties props = new Properties();
            props.load(new FileInputStream(logConfigPath));
            PropertyConfigurator.configure(props);
        }

        String performanceConifPath = args.get("performanceConfigFile");
        if (performanceConifPath != null) {
            args.remove("performanceConfigFile");
            performanceConfigFile = new File(performanceConifPath);
        }

        String encoding = null;
        String inputEncoding = null;
        String outputEncoding = null;
        String builtInTemplate = null;

        String posTagSetPath = null;
        String externalResourcePath = null;
        String transitionSystemStr = null;

        for (Entry<String, String> arg : args.entrySet()) {
            String argName = arg.getKey();
            String argValue = arg.getValue();
            if (argName.equals("command")) {
                String commandString = argValue;
                if (commandString.equals("analyze"))
                    commandString = "analyse";

                command = Command.valueOf(commandString);
            } else if (argName.equals("option")) {
                option = Option.valueOf(argValue);
            } else if (argName.equals("mode")) {
                mode = Mode.valueOf(argValue);
            } else if (argName.equals("module")) {
                if (argValue.equalsIgnoreCase("sentence") || argValue.equalsIgnoreCase("sentenceDetector"))
                    module = Talismane.Module.SentenceDetector;
                else if (argValue.equalsIgnoreCase("tokenise") || argValue.equalsIgnoreCase("tokeniser"))
                    module = Talismane.Module.Tokeniser;
                else if (argValue.equalsIgnoreCase("postag") || argValue.equalsIgnoreCase("posTagger"))
                    module = Talismane.Module.PosTagger;
                else if (argValue.equalsIgnoreCase("parse") || argValue.equalsIgnoreCase("parser"))
                    module = Talismane.Module.Parser;
                else
                    throw new TalismaneException("Unknown module: " + argValue);
            } else if (argName.equals("startModule")) {
                if (argValue.equalsIgnoreCase("sentence") || argValue.equalsIgnoreCase("sentenceDetector"))
                    startModule = Talismane.Module.SentenceDetector;
                else if (argValue.equalsIgnoreCase("tokenise") || argValue.equalsIgnoreCase("tokeniser"))
                    startModule = Talismane.Module.Tokeniser;
                else if (argValue.equalsIgnoreCase("postag") || argValue.equalsIgnoreCase("posTagger"))
                    startModule = Talismane.Module.PosTagger;
                else if (argValue.equalsIgnoreCase("parse") || argValue.equalsIgnoreCase("parser"))
                    startModule = Talismane.Module.Parser;
                else
                    throw new TalismaneException("Unknown startModule: " + argValue);
            } else if (argName.equals("endModule")) {
                if (argValue.equalsIgnoreCase("sentence") || argValue.equalsIgnoreCase("sentenceDetector"))
                    endModule = Talismane.Module.SentenceDetector;
                else if (argValue.equalsIgnoreCase("tokenise") || argValue.equalsIgnoreCase("tokeniser"))
                    endModule = Talismane.Module.Tokeniser;
                else if (argValue.equalsIgnoreCase("postag") || argValue.equalsIgnoreCase("posTagger"))
                    endModule = Talismane.Module.PosTagger;
                else if (argValue.equalsIgnoreCase("parse") || argValue.equalsIgnoreCase("parser"))
                    endModule = Talismane.Module.Parser;
                else
                    throw new TalismaneException("Unknown endModule: " + argValue);
            } else if (argName.equals("inFile"))
                inFilePath = argValue;
            else if (argName.equals("outFile"))
                outFilePath = argValue;
            else if (argName.equals("outDir"))
                outDirPath = argValue;
            else if (argName.equals("template"))
                templatePath = argValue;
            else if (argName.equals("builtInTemplate"))
                builtInTemplate = argValue;
            else if (argName.equals("encoding")) {
                if (inputEncoding != null || outputEncoding != null)
                    throw new TalismaneException(
                            "The parameter 'encoding' cannot be used with 'inputEncoding' or 'outputEncoding'");
                encoding = argValue;
            } else if (argName.equals("inputEncoding")) {
                if (encoding != null)
                    throw new TalismaneException(
                            "The parameter 'encoding' cannot be used with 'inputEncoding' or 'outputEncoding'");
                inputEncoding = argValue;
            } else if (argName.equals("outputEncoding")) {
                if (encoding != null)
                    throw new TalismaneException(
                            "The parameter 'encoding' cannot be used with 'inputEncoding' or 'outputEncoding'");
                outputEncoding = argValue;
            } else if (argName.equals("includeDetails"))
                includeDetails = argValue.equalsIgnoreCase("true");
            else if (argName.equals("propagateBeam"))
                propagateBeam = argValue.equalsIgnoreCase("true");
            else if (argName.equals("beamWidth"))
                beamWidth = Integer.parseInt(argValue);
            else if (argName.equals("sentenceModel"))
                sentenceModelFilePath = argValue;
            else if (argName.equals("tokeniserModel"))
                tokeniserModelFilePath = argValue;
            else if (argName.equals("posTaggerModel"))
                posTaggerModelFilePath = argValue;
            else if (argName.equals("parserModel"))
                parserModelFilePath = argValue;
            else if (argName.equals("inputPatternFile"))
                inputPatternFilePath = argValue;
            else if (argName.equals("inputPattern"))
                inputRegex = argValue;
            else if (argName.equals("evaluationPatternFile"))
                evaluationPatternFilePath = argValue;
            else if (argName.equals("evaluationPattern"))
                evaluationRegex = argValue;
            else if (argName.equals("posTaggerRules")) {
                if (argValue.startsWith("replace:")) {
                    posTaggerRulesReplace = true;
                    posTaggerRuleFilePath = argValue.substring("replace:".length());
                } else {
                    posTaggerRuleFilePath = argValue;
                }
            } else if (argName.equals("parserRules")) {
                if (argValue.startsWith("replace:")) {
                    parserRulesReplace = true;
                    parserRuleFilePath = argValue.substring("replace:".length());
                } else {
                    parserRuleFilePath = argValue;
                }
            } else if (argName.equals("posTagSet"))
                posTagSetPath = argValue;
            else if (argName.equals("textFilters"))
                textFiltersPath = argValue;
            else if (argName.equals("tokenFilters"))
                tokenFiltersPath = argValue;
            else if (argName.equals("tokenSequenceFilters"))
                tokenSequenceFilterPath = argValue;
            else if (argName.equals("posTagSequenceFilters"))
                posTagSequenceFilterPath = argValue;
            else if (argName.equals("logStats"))
                logStats = argValue.equalsIgnoreCase("true");
            else if (argName.equals("newline"))
                newlineMarker = MarkerFilterType.valueOf(argValue);
            else if (argName.equals("fileName"))
                fileName = argValue;
            else if (argName.equals("processByDefault"))
                processByDefault = argValue.equalsIgnoreCase("true");
            else if (argName.equals("maxParseAnalysisTime"))
                maxParseAnalysisTime = Integer.parseInt(argValue);
            else if (argName.equals("minFreeMemory"))
                minFreeMemory = Integer.parseInt(argValue);
            else if (argName.equals("transitionSystem"))
                transitionSystemStr = argValue;
            else if (argName.equals("sentenceCount"))
                maxSentenceCount = Integer.parseInt(argValue);
            else if (argName.equals("endBlockCharCode"))
                endBlockCharacter = (char) Integer.parseInt(argValue);
            else if (argName.equals("outputGuesses"))
                outputGuesses = argValue.equalsIgnoreCase("true");
            else if (argName.equals("outputGuessCount"))
                outputGuessCount = Integer.parseInt(argValue);
            else if (argName.equals("suffix"))
                suffix = argValue;
            else if (argName.equals("includeDistanceFScores"))
                includeDistanceFScores = argValue.equalsIgnoreCase("true");
            else if (argName.equals("evaluationFile"))
                evaluationFilePath = argValue;
            else if (argName.equals("labeledEvaluation"))
                labeledEvaluation = argValue.equalsIgnoreCase("true");
            else if (argName.equals("tokeniserBeamWidth"))
                tokeniserBeamWidth = Integer.parseInt(argValue);
            else if (argName.equals("posTaggerBeamWidth"))
                posTaggerBeamWidth = Integer.parseInt(argValue);
            else if (argName.equals("parserBeamWidth"))
                parserBeamWidth = Integer.parseInt(argValue);
            else if (argName.equals("propagateTokeniserBeam"))
                propagateTokeniserBeam = argValue.equalsIgnoreCase("true");
            else if (argName.equals("blockSize"))
                blockSize = Integer.parseInt(argValue);
            else if (argName.equals("crossValidationSize"))
                crossValidationSize = Integer.parseInt(argValue);
            else if (argName.equals("includeIndex"))
                includeIndex = Integer.parseInt(argValue);
            else if (argName.equals("excludeIndex"))
                excludeIndex = Integer.parseInt(argValue);
            else if (argName.equals("dynamiseFeatures"))
                dynamiseFeatures = argValue.equalsIgnoreCase("true");
            else if (argName.equals("predictTransitions"))
                predictTransitions = argValue.equalsIgnoreCase("true");
            else if (argName.equals("lexiconDir")) {
                if (argValue.startsWith("replace:")) {
                    replaceLexicon = true;
                    lexiconDirPath = argValue.substring("replace:".length());
                } else {
                    lexiconDirPath = argValue;
                }
            } else if (argName.equals("perceptronScoring")) {
                PerceptronScoring perceptronScoring = PerceptronScoring.valueOf(argValue);
                MachineLearningSession.setPerceptronScoring(perceptronScoring);
            } else if (argName.equals("parseComparisonStrategy")) {
                parseComparisonStrategyType = ParseComparisonStrategyType.valueOf(argValue);
            } else if (argName.equals("sentenceReader")) {
                sentenceReaderPath = argValue;
            } else if (argName.equals("skipLabel")) {
                skipLabel = argValue;
            } else if (argName.equals("earlyStop")) {
                earlyStop = argValue.equalsIgnoreCase("true");
            } else if (argName.equals("sentenceFeatures")) {
                sentenceFeaturePath = argValue;
            } else if (argName.equals("tokeniserFeatures")) {
                tokeniserFeaturePath = argValue;
            } else if (argName.equals("tokeniserPatterns")) {
                tokeniserPatternFilePath = argValue;
            } else if (argName.equals("posTaggerFeatures")) {
                posTaggerFeaturePath = argValue;
            } else if (argName.equals("parserFeatures")) {
                parserFeaturePath = argValue;
            } else if (argName.equals("externalResources")) {
                externalResourcePath = argValue;
            } else if (argName.equals("testWords")) {
                String[] parts = argValue.split(";");
                testWords = new HashSet<String>();
                for (String part : parts)
                    testWords.add(part);
            } else if (argName.equals("includeLexiconCoverage")) {
                includeLexiconCoverage = argValue.equalsIgnoreCase("true");
            } else if (argName.equals("iterations"))
                iterations = Integer.parseInt(argValue);
            else if (argName.equals("cutoff"))
                cutoff = Integer.parseInt(argValue);
            else if (argName.equals("dependencyLabels"))
                dependencyLabelPath = argValue;
            else if (argName.equals("parsingConstrainer"))
                parsingConstrainerPath = argValue;
            else if (argName.equals("algorithm"))
                algorithm = MachineLearningAlgorithm.valueOf(argValue);
            else if (argName.equals("linearSVMSolver"))
                solverType = LinearSVMSolverType.valueOf(argValue);
            else if (argName.equals("linearSVMCost"))
                constraintViolationCost = Double.parseDouble(argValue);
            else if (argName.equals("linearSVMEpsilon"))
                epsilon = Double.parseDouble(argValue);
            else if (argName.equals("perceptronTolerance"))
                perceptronTolerance = Double.parseDouble(argValue);
            else if (argName.equals("averageAtIntervals"))
                averageAtIntervals = argValue.equalsIgnoreCase("true");
            else if (argName.equals("perceptronObservationPoints")) {
                String[] points = argValue.split(",");
                perceptronObservationPoints = new ArrayList<Integer>();
                for (String point : points)
                    perceptronObservationPoints.add(Integer.parseInt(point));
            } else if (argName.equals("patternTokeniser"))
                patternTokeniserType = PatternTokeniserType.valueOf(argValue);
            else if (argName.equals("excludeFile")) {
                excludeFileName = argValue;
            } else {
                System.out.println("Unknown argument: " + argName);
                throw new RuntimeException("Unknown argument: " + argName);
            }
        }

        if (command == null)
            throw new TalismaneException("No command provided.");

        if (command.equals(Command.evaluate)) {
            if (outDirPath.length() == 0)
                throw new RuntimeException("Missing argument: outdir");
        }

        if (startModule == null)
            startModule = module;
        if (startModule == null)
            startModule = Module.SentenceDetector;
        if (endModule == null)
            endModule = module;
        if (endModule == null)
            endModule = Module.Parser;
        if (module == null)
            module = endModule;

        if (command == Command.train) {
            this.predictTransitions = true;
        }

        if (builtInTemplate != null) {
            if (builtInTemplate.equalsIgnoreCase("with_location")) {
                tokeniserTemplateName = "tokeniser_template_with_location.ftl";
                posTaggerTemplateName = "posTagger_template_with_location.ftl";
                parserTemplateName = "parser_conll_template_with_location.ftl";
            } else if (builtInTemplate.equalsIgnoreCase("with_prob")) {
                tokeniserTemplateName = "tokeniser_template_with_prob.ftl";
                posTaggerTemplateName = "posTagger_template_with_prob.ftl";
                parserTemplateName = "parser_conll_template_with_prob.ftl";
            } else if (builtInTemplate.equalsIgnoreCase("with_comments")) {
                posTaggerTemplateName = "posTagger_template_with_comments.ftl";
                parserTemplateName = "parser_conll_template_with_comments.ftl";
            } else {
                throw new TalismaneException("Unknown builtInTemplate: " + builtInTemplate);
            }
        }

        if (posTaggerBeamWidth < 0)
            posTaggerBeamWidth = beamWidth;
        if (parserBeamWidth < 0)
            parserBeamWidth = beamWidth;

        inputCharset = Charset.defaultCharset();
        outputCharset = Charset.defaultCharset();
        if (encoding != null) {
            inputCharset = Charset.forName(encoding);
            outputCharset = Charset.forName(encoding);
        } else {
            if (inputEncoding != null)
                inputCharset = Charset.forName(inputEncoding);
            if (outputEncoding != null)
                outputCharset = Charset.forName(outputEncoding);
        }

        if (fileName == null && inFilePath != null) {
            fileName = inFilePath;
        }

        if (posTagSetPath != null) {
            File posTagSetFile = new File(posTagSetPath);
            Scanner posTagSetScanner = new Scanner(new BufferedReader(
                    new InputStreamReader(new FileInputStream(posTagSetFile), this.getInputCharset().name())));

            PosTagSet posTagSet = this.getPosTaggerService().getPosTagSet(posTagSetScanner);
            TalismaneSession.setPosTagSet(posTagSet);
        }

        if (transitionSystemStr != null) {
            TransitionSystem transitionSystem = null;
            if (transitionSystemStr.equalsIgnoreCase("ShiftReduce")) {
                transitionSystem = this.getParserService().getShiftReduceTransitionSystem();
            } else if (transitionSystemStr.equalsIgnoreCase("ArcEager")) {
                transitionSystem = this.getParserService().getArcEagerTransitionSystem();
            } else {
                throw new TalismaneException("Unknown transition system: " + transitionSystemStr);
            }

            if (dependencyLabelPath != null) {
                File dependencyLabelFile = new File(dependencyLabelPath);
                Scanner depLabelScanner = new Scanner(new BufferedReader(
                        new InputStreamReader(new FileInputStream(dependencyLabelFile), "UTF-8")));
                List<String> dependencyLabels = new ArrayList<String>();
                while (depLabelScanner.hasNextLine()) {
                    String dependencyLabel = depLabelScanner.nextLine();
                    if (!dependencyLabel.startsWith("#"))
                        dependencyLabels.add(dependencyLabel);
                }
                transitionSystem.setDependencyLabels(dependencyLabels);
            }

            TalismaneSession.setTransitionSystem(transitionSystem);
        }

        if (this.lexiconDirPath != null) {
            PosTaggerLexicon lexicon = null;
            LexiconChain lexiconChain = null;

            if (replaceLexicon) {
                lexiconChain = new LexiconChain();
            } else {
                lexicon = this.implementation.getDefaultLexicon();
                if (lexicon instanceof LexiconChain) {
                    lexiconChain = (LexiconChain) lexicon;
                } else {
                    lexiconChain = new LexiconChain();
                    lexiconChain.addLexicon(lexicon);
                }
            }
            File lexiconDir = new File(lexiconDirPath);
            LexiconDeserializer lexiconDeserializer = new LexiconDeserializer();
            List<PosTaggerLexicon> lexicons = lexiconDeserializer.deserializeLexicons(lexiconDir);
            for (PosTaggerLexicon oneLexicon : lexicons) {
                lexiconChain.addLexicon(oneLexicon);
            }

            lexicon = lexiconChain;

            TalismaneSession.setLexicon(lexicon);
        }

        if (externalResourcePath != null) {
            externalResourceFinder = this.getMachineLearningService().getExternalResourceFinder();
            File externalResourceFile = new File(externalResourcePath);
            externalResourceFinder.addExternalResources(externalResourceFile);

            ExternalResourceFinder parserResourceFinder = this.getParserFeatureService()
                    .getExternalResourceFinder();
            ExternalResourceFinder posTaggerResourceFinder = this.getPosTaggerFeatureService()
                    .getExternalResourceFinder();
            ExternalResourceFinder tokeniserResourceFinder = this.getTokenFeatureService()
                    .getExternalResourceFinder();
            ExternalResourceFinder sentenceResourceFinder = this.getSentenceDetectorFeatureService()
                    .getExternalResourceFinder();
            for (ExternalResource<?> externalResource : externalResourceFinder.getExternalResources()) {
                parserResourceFinder.addExternalResource(externalResource);
                posTaggerResourceFinder.addExternalResource(externalResource);
                tokeniserResourceFinder.addExternalResource(externalResource);
                sentenceResourceFinder.addExternalResource(externalResource);
            }
        }
    }

    /**
     * The actual command to run by Talismane.
     * @return
     */
    public Command getCommand() {
        return command;
    }

    public void setCommand(Command command) {
        this.command = command;
    }

    /**
     * If the command required a start module (e.g. analyse), the start module for this command.
     * Default is {@link com.joliciel.talismane.Talismane.Module#SentenceDetector}.
     * @return
     */
    public Module getStartModule() {
        return startModule;
    }

    public void setStartModule(Module startModule) {
        this.startModule = startModule;
    }

    /**
     * If the command requires an end module (e.g. analyse), the end module for this command.
     * Default is {@link com.joliciel.talismane.Talismane.Module#Parser}.
     * @return
     */
    public Module getEndModule() {
        return endModule;
    }

    public void setEndModule(Module endModule) {
        this.endModule = endModule;
    }

    /**
     * For commands which only affect a single module (e.g. evaluate), the module for this command.
     * @return
     */
    public Module getModule() {
        return module;
    }

    public void setModule(Module module) {
        this.module = module;
    }

    /**
     * When analysing, should the raw text be processed by default, or should we wait until a text
     * marker filter tells us to start processing. Default is true.
     * @return
     */
    public boolean isProcessByDefault() {
        return processByDefault;
    }

    public void setProcessByDefault(boolean processByDefault) {
        this.processByDefault = processByDefault;
    }

    /**
     * For the "process" command, the maximum number of sentences to process. If <=0, all sentences
     * will be processed. Default is 0 (all).
     * @return
     */
    public int getMaxSentenceCount() {
        return maxSentenceCount;
    }

    public void setMaxSentenceCount(int maxSentenceCount) {
        this.maxSentenceCount = maxSentenceCount;
    }

    /**
     * The charset that is used to interpret the input stream.
     * @return
     */
    public Charset getInputCharset() {
        return inputCharset;
    }

    public void setInputCharset(Charset inputCharset) {
        this.inputCharset = inputCharset;
    }

    /**
     * The charset that is used to write to the output stream.
     * @return
     */
    public Charset getOutputCharset() {
        return outputCharset;
    }

    public void setOutputCharset(Charset outputCharset) {
        this.outputCharset = outputCharset;
    }

    /**
     * A character (typically non-printing) which will mark a stop in the input stream and set-off analysis.
     * The default value is the form-feed character (code=12).
     * @return
     */
    public char getEndBlockCharacter() {
        return endBlockCharacter;
    }

    public void setEndBlockCharacter(char endBlockCharacter) {
        this.endBlockCharacter = endBlockCharacter;
    }

    /**
     * The beam width for beam-search analysis. Default is 1.
     * Increasing this value will increase analysis time in a linear fashion, but will typically improve results.
     * @return
     */
    public int getBeamWidth() {
        return beamWidth;
    }

    public void setBeamWidth(int beamWidth) {
        this.beamWidth = beamWidth;
    }

    /**
     * If true, the full beam of analyses produced as output by a given module will be used as input for the next module.
     * If false, only the single best analysis will be used as input for the next module.
     * @return
     */
    public boolean isPropagateBeam() {
        return propagateBeam;
    }

    public void setPropagateBeam(boolean propagateBeam) {
        this.propagateBeam = propagateBeam;
    }

    /**
     * If true, a generates a very detailed analysis on how Talismane obtained the results it displays.
     * @return
     */
    public boolean isIncludeDetails() {
        return includeDetails;
    }

    public void setIncludeDetails(boolean includeDetails) {
        this.includeDetails = includeDetails;
    }

    /**
     * The reader to be used to read the data for this analysis.
     * @return
     */
    public Reader getReader() {
        if (this.reader == null) {
            if (inFilePath != null) {
                try {
                    File inFile = new File(inFilePath);
                    this.reader = new BufferedReader(
                            new InputStreamReader(new FileInputStream(inFile), this.getInputCharset()));
                } catch (FileNotFoundException fnfe) {
                    LogUtils.logError(LOG, fnfe);
                    throw new RuntimeException(fnfe);
                }
            } else {
                this.reader = new BufferedReader(new InputStreamReader(System.in, this.getInputCharset()));
            }
        }
        return reader;
    }

    /**
     * The reader to be used to read the data for evaluation, when command=compare.
     * @return
     */
    public Reader getEvaluationReader() {
        if (this.evaluationReader == null) {
            try {
                File inFile = new File(evaluationFilePath);
                this.evaluationReader = new BufferedReader(
                        new InputStreamReader(new FileInputStream(inFile), this.getInputCharset()));
            } catch (FileNotFoundException fnfe) {
                LogUtils.logError(LOG, fnfe);
                throw new RuntimeException(fnfe);
            }
        }
        return evaluationReader;
    }

    /**
     * A writer to which Talismane should write its output when analysing.
     * @return
     */
    public Writer getWriter() {
        try {
            if (writer == null) {
                if (outFilePath != null) {
                    if (outFilePath.lastIndexOf("/") >= 0) {
                        String outFileDirPath = outFilePath.substring(0, outFilePath.lastIndexOf("/"));
                        File outFileDir = new File(outFileDirPath);
                        outFileDir.mkdirs();
                    }
                    File outFile = new File(outFilePath);
                    outFile.delete();
                    outFile.createNewFile();

                    writer = new BufferedWriter(
                            new OutputStreamWriter(new FileOutputStream(outFile), this.getOutputCharset()));
                } else {
                    writer = new BufferedWriter(new OutputStreamWriter(System.out, this.getOutputCharset()));
                }
            }
            return writer;
        } catch (Exception e) {
            LogUtils.logError(LOG, e);
            throw new RuntimeException(e);
        }
    }

    /**
     * The filename to be applied to this analysis (if filename is included in the output).
     * @return
     */
    public String getFileName() {
        return fileName;
    }

    /**
     * The directory to which we write any output files.
     * @return
     */
    public File getOutDir() {
        if (outDirPath != null) {
            outDir = new File(outDirPath);
            outDir.mkdirs();
        } else if (outFilePath != null) {
            if (outFilePath.lastIndexOf("/") >= 0) {
                String outFileDirPath = outFilePath.substring(0, outFilePath.lastIndexOf("/"));
                outDir = new File(outFileDirPath);
                outDir.mkdirs();
            }
        }
        return outDir;
    }

    /**
     * The rules to apply when running the pos-tagger.
     * @return
     */
    public List<PosTaggerRule> getPosTaggerRules() {
        try {
            if (posTaggerRules == null) {
                posTaggerRules = new ArrayList<PosTaggerRule>();
                for (int i = 0; i <= 1; i++) {
                    Scanner rulesScanner = null;
                    if (i == 0) {
                        if (posTaggerRulesReplace)
                            continue;
                        rulesScanner = this.implementation.getDefaultPosTaggerRulesScanner();
                    } else {
                        if (posTaggerRuleFilePath != null && posTaggerRuleFilePath.length() > 0) {
                            File posTaggerRuleFile = new File(posTaggerRuleFilePath);
                            rulesScanner = new Scanner(new BufferedReader(new InputStreamReader(
                                    new FileInputStream(posTaggerRuleFile), this.getInputCharset().name())));
                        }
                    }

                    if (rulesScanner != null) {
                        List<String> ruleDescriptors = new ArrayList<String>();
                        while (rulesScanner.hasNextLine()) {
                            String ruleDescriptor = rulesScanner.nextLine();
                            if (ruleDescriptor.length() > 0) {
                                ruleDescriptors.add(ruleDescriptor);
                                LOG.trace(ruleDescriptor);
                            }
                        }
                        List<PosTaggerRule> rules = this.getPosTaggerFeatureService().getRules(ruleDescriptors);
                        posTaggerRules.addAll(rules);

                    }
                }
            }
            return posTaggerRules;
        } catch (Exception e) {
            LogUtils.logError(LOG, e);
            throw new RuntimeException(e);
        }
    }

    /**
     * The rules to apply when running the parser.
     * @return
     */
    public List<ParserRule> getParserRules() {
        try {
            if (parserRules == null) {
                parserRules = new ArrayList<ParserRule>();
                if (parserRuleFilePath != null && parserRuleFilePath.equalsIgnoreCase("null")) {
                    // add no rules! (not even built-in ones)
                } else {
                    for (int i = 0; i <= 1; i++) {
                        Scanner rulesScanner = null;
                        if (i == 0) {
                            if (parserRulesReplace)
                                continue;
                            rulesScanner = this.implementation.getDefaultParserRulesScanner();
                        } else {
                            if (parserRuleFilePath != null && parserRuleFilePath.length() > 0) {
                                File parserRuleFile = new File(parserRuleFilePath);
                                rulesScanner = new Scanner(new BufferedReader(new InputStreamReader(
                                        new FileInputStream(parserRuleFile), this.getInputCharset().name())));
                            }
                        }

                        if (rulesScanner != null) {
                            List<String> ruleDescriptors = new ArrayList<String>();
                            while (rulesScanner.hasNextLine()) {
                                String ruleDescriptor = rulesScanner.nextLine();
                                if (ruleDescriptor.length() > 0) {
                                    ruleDescriptors.add(ruleDescriptor);
                                    LOG.trace(ruleDescriptor);
                                }
                            }
                            List<ParserRule> rules = this.getParserFeatureService().getRules(ruleDescriptors,
                                    dynamiseFeatures);
                            parserRules.addAll(rules);

                        }
                    }
                }
            }
            return parserRules;
        } catch (Exception e) {
            LogUtils.logError(LOG, e);
            throw new RuntimeException(e);
        }
    }

    /**
     * A regex used to process the input, when pre-annotated.
     * @return
     */
    public String getInputRegex() {
        try {
            if (inputRegex == null && inputPatternFilePath != null && inputPatternFilePath.length() > 0) {
                Scanner inputPatternScanner = null;
                File inputPatternFile = new File(inputPatternFilePath);
                inputPatternScanner = new Scanner(inputPatternFile);
                if (inputPatternScanner.hasNextLine()) {
                    inputRegex = inputPatternScanner.nextLine();
                }
                inputPatternScanner.close();
                if (inputRegex == null)
                    throw new TalismaneException("No input pattern found in " + inputPatternFilePath);
            }
            return inputRegex;
        } catch (Exception e) {
            LogUtils.logError(LOG, e);
            throw new RuntimeException(e);
        }
    }

    /**
     * A regex used to process the evaluation corpus.
     * @return
     */
    public String getEvaluationRegex() {
        try {
            if (evaluationRegex == null) {
                if (evaluationPatternFilePath != null && evaluationPatternFilePath.length() > 0) {
                    Scanner evaluationPatternScanner = null;
                    File evaluationPatternFile = new File(evaluationPatternFilePath);
                    evaluationPatternScanner = new Scanner(evaluationPatternFile);
                    if (evaluationPatternScanner.hasNextLine()) {
                        evaluationRegex = evaluationPatternScanner.nextLine();
                    }
                    evaluationPatternScanner.close();
                    if (evaluationRegex == null)
                        throw new TalismaneException("No evaluation pattern found in " + evaluationPatternFilePath);
                } else {
                    evaluationRegex = this.getInputRegex();
                }
            }
            return evaluationRegex;
        } catch (Exception e) {
            LogUtils.logError(LOG, e);
            throw new RuntimeException(e);
        }
    }

    /**
     * Text marker filters are applied to raw text segments extracted from the stream, 3 segments at a time.
     * This means that if a particular marker crosses segment borders, it is handled correctly.
     * @return
     */
    public List<TextMarkerFilter> getTextMarkerFilters() {
        try {
            if (textMarkerFilters == null) {
                textMarkerFilters = new ArrayList<TextMarkerFilter>();

                // insert sentence breaks at end of block
                this.addTextMarkerFilter(this.getFilterService().getRegexMarkerFilter(
                        new MarkerFilterType[] { MarkerFilterType.SENTENCE_BREAK }, "" + endBlockCharacter,
                        blockSize));

                // handle newline as requested
                if (newlineMarker.equals(MarkerFilterType.SENTENCE_BREAK))
                    this.addTextMarkerFilter(this.getFilterService().getNewlineEndOfSentenceMarker());
                else if (newlineMarker.equals(MarkerFilterType.SPACE))
                    this.addTextMarkerFilter(this.getFilterService().getNewlineSpaceMarker());

                // get rid of duplicate white-space always
                this.addTextMarkerFilter(this.getFilterService().getDuplicateWhiteSpaceFilter());

                for (int i = 0; i <= 1; i++) {
                    LOG.debug("Text marker filters");
                    Scanner textFilterScanner = null;
                    if (i == 0) {
                        if (textFiltersPath != null && textFiltersPath.length() > 0) {
                            LOG.debug("From: " + textFiltersPath);
                            File textFilterFile = new File(textFiltersPath);
                            textFilterScanner = new Scanner(new BufferedReader(new InputStreamReader(
                                    new FileInputStream(textFilterFile), this.getInputCharset().name())));
                        }
                    } else {
                        LOG.debug("From default");
                        textFilterScanner = this.implementation.getDefaultTextMarkerFiltersScanner();
                    }
                    if (textFilterScanner != null) {
                        while (textFilterScanner.hasNextLine()) {
                            String descriptor = textFilterScanner.nextLine();
                            LOG.debug(descriptor);
                            if (descriptor.length() > 0 && !descriptor.startsWith("#")) {
                                TextMarkerFilter textMarkerFilter = this.getFilterService()
                                        .getTextMarkerFilter(descriptor, blockSize);
                                this.addTextMarkerFilter(textMarkerFilter);
                            }
                        }
                    }
                }

            }
            return textMarkerFilters;
        } catch (Exception e) {
            LogUtils.logError(LOG, e);
            throw new RuntimeException(e);
        }
    }

    public void setTextMarkerFilters(List<TextMarkerFilter> textMarkerFilters) {
        this.textMarkerFilters = textMarkerFilters;
    }

    public void addTextMarkerFilter(TextMarkerFilter textMarkerFilter) {
        this.textMarkerFilters.add(textMarkerFilter);
    }

    /**
     * TokenFilters to be applied during analysis.
     * @return
     */
    public List<TokenSequenceFilter> getTokenSequenceFilters() {
        try {
            if (tokenSequenceFilters == null) {
                List<String> tokenSequenceFilterDescriptors = new ArrayList<String>();
                tokenSequenceFilters = new ArrayList<TokenSequenceFilter>();

                if (tokenSequenceFilterPath != null) {
                    File tokenSequenceFilterFile = new File(tokenSequenceFilterPath);
                    Scanner scanner = new Scanner(tokenSequenceFilterFile);

                    while (scanner.hasNextLine()) {
                        String descriptor = scanner.nextLine();
                        LOG.debug(descriptor);
                        tokenSequenceFilterDescriptors.add(descriptor);
                        if (descriptor.length() > 0 && !descriptor.startsWith("#")) {
                            TokenSequenceFilter tokenSequenceFilter = this.getTokenFilterService()
                                    .getTokenSequenceFilter(descriptor);
                            tokenSequenceFilters.add(tokenSequenceFilter);
                        }
                    }
                }

                tokenSequenceFilters.addAll(this.implementation.getDefaultTokenSequenceFilters());

                this.getDescriptors().put(TokenFilterService.TOKEN_SEQUENCE_FILTER_DESCRIPTOR_KEY,
                        tokenSequenceFilterDescriptors);
            }
            return tokenSequenceFilters;
        } catch (Exception e) {
            LogUtils.logError(LOG, e);
            throw new RuntimeException(e);
        }
    }

    /**
     * TokenFilters to be applied during analysis.
     * @return
     */
    public List<PosTagSequenceFilter> getDefaultPosTagSequenceFilters() {
        try {
            if (posTaggerPostProcessingFilters == null) {
                List<String> posTaggerPostProcessingFilterDescriptors = new ArrayList<String>();
                posTaggerPostProcessingFilters = new ArrayList<PosTagSequenceFilter>();

                if (posTagSequenceFilterPath != null) {
                    File filterFile = new File(posTagSequenceFilterPath);
                    Scanner scanner = new Scanner(filterFile);

                    while (scanner.hasNextLine()) {
                        String descriptor = scanner.nextLine();
                        LOG.debug(descriptor);
                        posTaggerPostProcessingFilterDescriptors.add(descriptor);
                        if (descriptor.length() > 0 && !descriptor.startsWith("#")) {
                            PosTagSequenceFilter filter = this.getPosTagFilterService()
                                    .getPosTagSequenceFilter(descriptor);
                            posTaggerPostProcessingFilters.add(filter);
                        }
                    }
                }

                this.getDescriptors().put(PosTagFilterService.POSTAG_POSTPROCESSING_FILTER_DESCRIPTOR_KEY,
                        posTaggerPostProcessingFilterDescriptors);

            }
            return posTaggerPostProcessingFilters;
        } catch (Exception e) {
            LogUtils.logError(LOG, e);
            throw new RuntimeException(e);
        }
    }

    /**
     * TokenFilters to be applied during analysis.
     * @return
     */
    public List<TokenFilter> getTokenFilters() {
        try {
            if (tokenFilters == null) {
                List<String> tokenFilterDescriptors = new ArrayList<String>();
                tokenFilters = new ArrayList<TokenFilter>();
                for (int i = 0; i <= 1; i++) {
                    LOG.debug("Token filters");
                    Scanner tokenFilterScanner = null;
                    if (i == 0) {
                        if (tokenFiltersPath != null && tokenFiltersPath.length() > 0) {
                            LOG.debug("From: " + tokenFiltersPath);
                            File tokenFilterFile = new File(tokenFiltersPath);
                            tokenFilterScanner = new Scanner(tokenFilterFile);
                        }
                    } else {
                        LOG.debug("From default");
                        tokenFilterScanner = this.implementation.getDefaultTokenFiltersScanner();
                    }
                    if (tokenFilterScanner != null) {
                        while (tokenFilterScanner.hasNextLine()) {
                            String descriptor = tokenFilterScanner.nextLine();
                            LOG.debug(descriptor);
                            tokenFilterDescriptors.add(descriptor);
                            if (descriptor.length() > 0 && !descriptor.startsWith("#")) {
                                TokenFilter tokenFilter = this.getTokenFilterService().getTokenFilter(descriptor);
                                tokenFilters.add(tokenFilter);
                            }
                        }
                    }
                }
                this.getDescriptors().put(TokenFilterService.TOKEN_FILTER_DESCRIPTOR_KEY, tokenFilterDescriptors);

            }
            return tokenFilters;
        } catch (Exception e) {
            LogUtils.logError(LOG, e);
            throw new RuntimeException(e);
        }
    }

    public void setTokenFilters(List<TokenFilter> tokenFilters) {
        this.tokenFilters = tokenFilters;
    }

    /**
     * The sentence detector to use for analysis.
     * @return
     */
    public SentenceDetector getSentenceDetector() {
        try {
            if (sentenceDetector == null) {
                LOG.debug("Getting sentence detector model");
                ClassificationModel<SentenceDetectorOutcome> sentenceModel = null;
                if (sentenceModelFilePath != null) {
                    sentenceModel = this.getMachineLearningService()
                            .getClassificationModel(new ZipInputStream(new FileInputStream(sentenceModelFilePath)));
                } else {
                    sentenceModel = this.getMachineLearningService()
                            .getClassificationModel(this.implementation.getDefaultSentenceModelStream());
                }
                sentenceDetector = this.getSentenceDetectorService().getSentenceDetector(sentenceModel);
            }
            return sentenceDetector;
        } catch (Exception e) {
            LogUtils.logError(LOG, e);
            throw new RuntimeException(e);
        }
    }

    /**
     * The tokeniser to use for analysis.
     * @return
     */
    public Tokeniser getTokeniser() {
        try {
            if (tokeniser == null) {
                LOG.debug("Getting tokeniser model");
                ClassificationModel<TokeniserOutcome> tokeniserModel = this.getTokeniserModel();
                tokeniser = this.getTokeniserPatternService().getPatternTokeniser(tokeniserModel,
                        tokeniserBeamWidth);

                if (includeDetails) {
                    String detailsFilePath = this.getBaseName() + "_tokeniser_details.txt";
                    File detailsFile = new File(this.getOutDir(), detailsFilePath);
                    detailsFile.delete();
                    ClassificationObserver<TokeniserOutcome> observer = tokeniserModel
                            .getDetailedAnalysisObserver(detailsFile);
                    tokeniser.addObserver(observer);
                }

                for (TokenFilter tokenFilter : this.getTokenFilters()) {
                    tokeniser.addTokenFilter(tokenFilter);
                    if (this.needsSentenceDetector()) {
                        this.getSentenceDetector().addTokenFilter(tokenFilter);
                    }
                }

                List<String> tokenFilterDescriptors = tokeniserModel.getDescriptors()
                        .get(TokenFilterService.TOKEN_FILTER_DESCRIPTOR_KEY);
                if (tokenFilterDescriptors != null) {
                    for (String descriptor : tokenFilterDescriptors) {
                        if (descriptor.length() > 0 && !descriptor.startsWith("#")) {
                            TokenFilter tokenFilter = this.getTokenFilterService().getTokenFilter(descriptor);
                            tokeniser.addTokenFilter(tokenFilter);
                            if (this.needsSentenceDetector()) {
                                this.getSentenceDetector().addTokenFilter(tokenFilter);
                            }
                        }
                    }
                }

                for (TokenSequenceFilter tokenFilter : this.getTokenSequenceFilters()) {
                    tokeniser.addTokenSequenceFilter(tokenFilter);
                }

                List<String> tokenSequenceFilterDescriptors = tokeniserModel.getDescriptors()
                        .get(TokenFilterService.TOKEN_SEQUENCE_FILTER_DESCRIPTOR_KEY);
                if (tokenSequenceFilterDescriptors != null) {
                    for (String descriptor : tokenSequenceFilterDescriptors) {
                        if (descriptor.length() > 0 && !descriptor.startsWith("#")) {
                            TokenSequenceFilter tokenSequenceFilter = this.getTokenFilterService()
                                    .getTokenSequenceFilter(descriptor);
                            tokeniser.addTokenSequenceFilter(tokenSequenceFilter);
                        }
                    }
                }
            }
            return tokeniser;
        } catch (Exception e) {
            LogUtils.logError(LOG, e);
            throw new RuntimeException(e);
        }
    }

    ClassificationModel<TokeniserOutcome> getTokeniserModel() {
        try {
            if (tokeniserModel == null) {
                if (tokeniserModelFilePath != null) {
                    tokeniserModel = this.getMachineLearningService().getClassificationModel(
                            new ZipInputStream(new FileInputStream(tokeniserModelFilePath)));
                } else {
                    tokeniserModel = this.getMachineLearningService()
                            .getClassificationModel(this.implementation.getDefaultTokeniserModelStream());
                }
            }
            return tokeniserModel;
        } catch (Exception e) {
            LogUtils.logError(LOG, e);
            throw new RuntimeException(e);
        }
    }

    ClassificationModel<PosTag> getPosTaggerModel() {
        try {
            if (posTaggerModel == null) {
                if (posTaggerModelFilePath != null) {
                    posTaggerModel = this.getMachineLearningService().getClassificationModel(
                            new ZipInputStream(new FileInputStream(posTaggerModelFilePath)));
                } else {
                    posTaggerModel = this.getMachineLearningService()
                            .getClassificationModel(this.implementation.getDefaultPosTaggerModelStream());
                }
            }
            return posTaggerModel;
        } catch (Exception e) {
            LogUtils.logError(LOG, e);
            throw new RuntimeException(e);
        }
    }

    MachineLearningModel getParserModel() {
        try {
            if (parserModel == null) {
                if (parserModelFilePath != null) {
                    parserModel = this.getMachineLearningService()
                            .getMachineLearningModel(new ZipInputStream(new FileInputStream(parserModelFilePath)));
                } else {
                    parserModel = this.getMachineLearningService()
                            .getMachineLearningModel(this.implementation.getDefaultParserModelStream());
                }
            }
            return parserModel;
        } catch (Exception e) {
            LogUtils.logError(LOG, e);
            throw new RuntimeException(e);
        }
    }

    private TokeniserPatternManager getTokeniserPatternManager() {
        if (tokeniserPatternManager == null) {
            if (tokeniserPatternFilePath.length() == 0)
                throw new RuntimeException("Missing argument: tokeniserPatterns");
            try {
                File tokeniserPatternFile = new File(tokeniserPatternFilePath);
                Scanner scanner = new Scanner(new BufferedReader(
                        new InputStreamReader(new FileInputStream(tokeniserPatternFile), this.getInputCharset())));
                List<String> patternDescriptors = new ArrayList<String>();
                while (scanner.hasNextLine()) {
                    String descriptor = scanner.nextLine();
                    patternDescriptors.add(descriptor);
                    LOG.debug(descriptor);
                }
                scanner.close();

                tokeniserPatternManager = this.getTokeniserPatternService().getPatternManager(patternDescriptors);
            } catch (Exception e) {
                LogUtils.logError(LOG, e);
                throw new RuntimeException(e);
            }
        }
        return tokeniserPatternManager;
    }

    public Set<SentenceDetectorFeature<?>> getSentenceDetectorFeatures() {
        if (sentenceFeatures == null) {
            try {
                if (sentenceFeaturePath != null) {
                    LOG.debug("Found setting to change sentence detector features");
                    File sentenceFeatureFile = new File(sentenceFeaturePath);
                    Scanner scanner = new Scanner(new BufferedReader(new InputStreamReader(
                            new FileInputStream(sentenceFeatureFile), this.getInputCharset())));
                    List<String> featureDescriptors = new ArrayList<String>();
                    while (scanner.hasNextLine()) {
                        String descriptor = scanner.nextLine();
                        featureDescriptors.add(descriptor);
                        LOG.debug(descriptor);
                    }
                    sentenceFeatures = this.getSentenceDetectorFeatureService().getFeatureSet(featureDescriptors);
                    this.getDescriptors().put(MachineLearningModel.FEATURE_DESCRIPTOR_KEY, featureDescriptors);
                }
            } catch (Exception e) {
                LogUtils.logError(LOG, e);
                throw new RuntimeException(e);
            }
        }
        return sentenceFeatures;
    }

    public Set<TokeniserContextFeature<?>> getTokeniserContextFeatures() {
        if (tokeniserContextFeatures == null) {
            try {
                if (tokeniserFeaturePath != null) {
                    TokeniserPatternManager tokeniserPatternManager = this.getTokeniserPatternManager();
                    LOG.debug("Found setting to change tokeniser context features");
                    File tokeniserFeatureFile = new File(tokeniserFeaturePath);
                    Scanner scanner = new Scanner(new BufferedReader(new InputStreamReader(
                            new FileInputStream(tokeniserFeatureFile), this.getInputCharset())));
                    List<String> featureDescriptors = new ArrayList<String>();
                    while (scanner.hasNextLine()) {
                        String descriptor = scanner.nextLine();
                        featureDescriptors.add(descriptor);
                        LOG.debug(descriptor);
                    }
                    tokeniserContextFeatures = this.getTokenFeatureService().getTokeniserContextFeatureSet(
                            featureDescriptors, tokeniserPatternManager.getParsedTestPatterns());
                    this.getDescriptors().put(MachineLearningModel.FEATURE_DESCRIPTOR_KEY, featureDescriptors);
                }
            } catch (Exception e) {
                LogUtils.logError(LOG, e);
                throw new RuntimeException(e);
            }
        }
        return tokeniserContextFeatures;
    }

    public Set<TokenPatternMatchFeature<?>> getTokenPatternMatchFeatures() {
        if (tokenPatternMatchFeatures == null) {
            try {
                if (tokeniserFeaturePath != null) {
                    LOG.debug("Found setting to change token pattern match features");
                    File tokeniserFeatureFile = new File(tokeniserFeaturePath);
                    Scanner scanner = new Scanner(new BufferedReader(new InputStreamReader(
                            new FileInputStream(tokeniserFeatureFile), this.getInputCharset())));
                    List<String> featureDescriptors = new ArrayList<String>();
                    while (scanner.hasNextLine()) {
                        String descriptor = scanner.nextLine();
                        featureDescriptors.add(descriptor);
                        LOG.debug(descriptor);
                    }
                    tokenPatternMatchFeatures = this.getTokenFeatureService()
                            .getTokenPatternMatchFeatureSet(featureDescriptors);
                    this.getDescriptors().put(MachineLearningModel.FEATURE_DESCRIPTOR_KEY, featureDescriptors);
                }
            } catch (Exception e) {
                LogUtils.logError(LOG, e);
                throw new RuntimeException(e);
            }
        }
        return tokenPatternMatchFeatures;
    }

    public Set<PosTaggerFeature<?>> getPosTaggerFeatures() {
        if (posTaggerFeatures == null) {
            try {
                if (posTaggerFeaturePath != null) {
                    LOG.debug("Found setting to change pos-tagger features");
                    File posTaggerFeatureFile = new File(posTaggerFeaturePath);
                    Scanner scanner = new Scanner(new BufferedReader(new InputStreamReader(
                            new FileInputStream(posTaggerFeatureFile), this.getInputCharset())));
                    List<String> featureDescriptors = new ArrayList<String>();
                    while (scanner.hasNextLine()) {
                        String descriptor = scanner.nextLine();
                        featureDescriptors.add(descriptor);
                        LOG.debug(descriptor);
                    }
                    posTaggerFeatures = this.getPosTaggerFeatureService().getFeatureSet(featureDescriptors);
                    this.getDescriptors().put(MachineLearningModel.FEATURE_DESCRIPTOR_KEY, featureDescriptors);
                }
            } catch (Exception e) {
                LogUtils.logError(LOG, e);
                throw new RuntimeException(e);
            }
        }
        return posTaggerFeatures;
    }

    public ClassificationEventStream getClassificationEventStream() {
        if (this.classificationEventStream == null) {
            switch (this.getModule()) {
            case SentenceDetector:
                classificationEventStream = this.getSentenceDetectorService().getSentenceDetectorEventStream(
                        this.getSentenceCorpusReader(), this.getSentenceDetectorFeatures());
                break;
            case Tokeniser:
                if (patternTokeniserType == PatternTokeniserType.Interval) {
                    Set<TokeniserContextFeature<?>> features = this.getTokeniserContextFeatures();
                    classificationEventStream = this.getTokeniserPatternService().getIntervalPatternEventStream(
                            this.getTokenCorpusReader(), features, this.getTokeniserPatternManager());
                } else {
                    Set<TokenPatternMatchFeature<?>> features = this.getTokenPatternMatchFeatures();
                    classificationEventStream = this.getTokeniserPatternService().getCompoundPatternEventStream(
                            this.getTokenCorpusReader(), features, this.getTokeniserPatternManager());
                }
                break;
            case PosTagger:
                classificationEventStream = this.getPosTaggerService()
                        .getPosTagEventStream(this.getPosTagCorpusReader(), this.getPosTaggerFeatures());
                break;
            case Parser:
                classificationEventStream = this.getParserService()
                        .getParseEventStream(this.getParserCorpusReader(), this.getParserFeatures());
                break;
            default:
                throw new TalismaneException("Unsupported module: " + this.getModule());
            }
        }
        return classificationEventStream;
    }

    /**
     * The pos-tagger to use for analysis.
     * @return
     */
    public PosTagger getPosTagger() {
        try {
            if (posTagger == null) {
                LOG.debug("Getting pos-tagger model");

                ClassificationModel<PosTag> posTaggerModel = this.getPosTaggerModel();

                posTagger = this.getPosTaggerService().getPosTagger(posTaggerModel, posTaggerBeamWidth);

                if (posTaggerFeaturePath != null) {
                    Set<PosTaggerFeature<?>> posTaggerFeatures = this.getPosTaggerFeatures();
                    posTagger.setPosTaggerFeatures(posTaggerFeatures);
                }

                List<String> posTaggerPreprocessingFilters = posTaggerModel.getDescriptors()
                        .get(PosTagFilterService.POSTAG_PREPROCESSING_FILTER_DESCRIPTOR_KEY);
                if (posTaggerPreprocessingFilters != null) {
                    for (String descriptor : posTaggerPreprocessingFilters) {
                        if (descriptor.length() > 0 && !descriptor.startsWith("#")) {
                            TokenSequenceFilter tokenSequenceFilter = this.getTokenFilterService()
                                    .getTokenSequenceFilter(descriptor);
                            posTagger.addPreProcessingFilter(tokenSequenceFilter);
                        }
                    }
                }

                for (TokenSequenceFilter tokenFilter : this.getTokenSequenceFilters()) {
                    posTagger.addPreProcessingFilter(tokenFilter);
                }

                List<String> posTaggerPostProcessingFilters = posTaggerModel.getDescriptors()
                        .get(PosTagFilterService.POSTAG_POSTPROCESSING_FILTER_DESCRIPTOR_KEY);
                if (posTaggerPostProcessingFilters != null) {
                    for (String descriptor : posTaggerPostProcessingFilters) {
                        if (descriptor.length() > 0 && !descriptor.startsWith("#")) {
                            PosTagSequenceFilter posTagSequenceFilter = this.getPosTagFilterService()
                                    .getPosTagSequenceFilter(descriptor);
                            posTagger.addPostProcessingFilter(posTagSequenceFilter);
                        }
                    }
                }

                for (PosTagSequenceFilter posTagFilter : this.getDefaultPosTagSequenceFilters()) {
                    posTagger.addPostProcessingFilter(posTagFilter);
                }

                posTagger.setPosTaggerRules(this.getPosTaggerRules());

                if (includeDetails) {
                    String detailsFilePath = this.getBaseName() + "_posTagger_details.txt";
                    File detailsFile = new File(this.getOutDir(), detailsFilePath);
                    detailsFile.delete();
                    ClassificationObserver<PosTag> observer = posTaggerModel
                            .getDetailedAnalysisObserver(detailsFile);
                    posTagger.addObserver(observer);
                }
            }
            return posTagger;
        } catch (Exception e) {
            LogUtils.logError(LOG, e);
            throw new RuntimeException(e);
        }
    }

    /**
     * The parser to use for analysis.
     * @return
     */
    public Parser getParser() {
        try {
            if (parser == null) {
                LOG.debug("Getting parser model");
                MachineLearningModel parserModel = this.getParserModel();

                parser = this.getParserService().getTransitionBasedParser(parserModel, parserBeamWidth,
                        dynamiseFeatures);
                parser.setMaxAnalysisTimePerSentence(maxParseAnalysisTime);
                parser.setMinFreeMemory(minFreeMemory);

                if (this.parserFeaturePath != null) {
                    Set<ParseConfigurationFeature<?>> parserFeatures = this.getParserFeatures();
                    parser.setParseFeatures(parserFeatures);
                }

                parser.setParserRules(this.getParserRules());

                if (parser instanceof TransitionBasedParser) {
                    TransitionBasedParser transitionBasedParser = (TransitionBasedParser) parser;
                    transitionBasedParser.setEarlyStop(earlyStop);
                }

                if (parseComparisonStrategyType != null) {
                    ParseComparisonStrategy parseComparisonStrategy = parserService
                            .getParseComparisonStrategy(parseComparisonStrategyType);
                    parser.setParseComparisonStrategy(parseComparisonStrategy);
                }

                if (includeDetails && parserModel instanceof ClassificationModel) {
                    String detailsFilePath = this.getBaseName() + "_parser_details.txt";
                    File detailsFile = new File(this.getOutDir(), detailsFilePath);
                    detailsFile.delete();
                    @SuppressWarnings("unchecked")
                    ClassificationModel<Transition> classificationModel = (ClassificationModel<Transition>) parserModel;
                    ClassificationObserver<Transition> observer = classificationModel
                            .getDetailedAnalysisObserver(detailsFile);
                    parser.addObserver(observer);
                }
                TalismaneSession.setTransitionSystem(parser.getTransitionSystem());

            }
            return parser;
        } catch (Exception e) {
            LogUtils.logError(LOG, e);
            throw new RuntimeException(e);
        }
    }

    public Set<ParseConfigurationFeature<?>> getParserFeatures() {
        if (parserFeatures == null) {
            try {
                if (parserFeaturePath != null) {
                    LOG.debug("Found setting to change parser features");
                    File parserFeatureFile = new File(parserFeaturePath);
                    Scanner scanner = new Scanner(new BufferedReader(
                            new InputStreamReader(new FileInputStream(parserFeatureFile), this.getInputCharset())));
                    List<String> featureDescriptors = new ArrayList<String>();
                    while (scanner.hasNextLine()) {
                        String descriptor = scanner.nextLine();
                        featureDescriptors.add(descriptor);
                        LOG.debug(descriptor);
                    }
                    parserFeatures = this.getParserFeatureService().getFeatures(featureDescriptors);

                    this.getDescriptors().put(MachineLearningModel.FEATURE_DESCRIPTOR_KEY, featureDescriptors);
                }
            } catch (Exception e) {
                LogUtils.logError(LOG, e);
                throw new RuntimeException(e);
            }
        }
        return parserFeatures;
    }

    /**
     * The maximum amount of time the parser will spend analysing any single sentence, in seconds.
     * If it exceeds this time, the parser will return a partial analysis, or a "dependency forest",
     * where certain nodes are left unattached (no governor).<br/>
     * A value of 0 indicates that there is no maximum time -
     * the parser will always continue until sentence analysis is complete.<br/>
     * The default value is 60.<br/>
     * @return
     */
    public int getMaxParseAnalysisTime() {
        return maxParseAnalysisTime;
    }

    public void setMaxParseAnalysisTime(int maxParseAnalysisTime) {
        this.maxParseAnalysisTime = maxParseAnalysisTime;
    }

    /**
     * A sentence processor to process sentences that have been read.
     * @return
     */
    public SentenceProcessor getSentenceProcessor() {
        try {
            if (sentenceProcessor == null && endModule.equals(Module.SentenceDetector)) {
                Reader templateReader = null;
                if (templatePath == null) {
                    templateReader = new BufferedReader(
                            new InputStreamReader(getInputStreamFromResource(sentenceTemplateName)));
                } else {
                    templateReader = new BufferedReader(new FileReader(new File(templatePath)));
                }
                FreemarkerTemplateWriter templateWriter = new FreemarkerTemplateWriter(templateReader);
                sentenceProcessor = templateWriter;
            }
            return sentenceProcessor;
        } catch (Exception e) {
            LogUtils.logError(LOG, e);
            throw new RuntimeException(e);
        }
    }

    /**
     * A token sequence processor to process token sequences that have been read.
     * @return
     */
    public TokenSequenceProcessor getTokenSequenceProcessor() {
        try {
            if (tokenSequenceProcessor == null && endModule.equals(Module.Tokeniser)) {
                Reader templateReader = null;
                if (templatePath == null) {
                    templateReader = new BufferedReader(
                            new InputStreamReader(getInputStreamFromResource(tokeniserTemplateName)));
                } else {
                    templateReader = new BufferedReader(new FileReader(new File(templatePath)));
                }
                FreemarkerTemplateWriter templateWriter = new FreemarkerTemplateWriter(templateReader);
                tokenSequenceProcessor = templateWriter;
            }
            return tokenSequenceProcessor;
        } catch (Exception e) {
            LogUtils.logError(LOG, e);
            throw new RuntimeException(e);
        }
    }

    /**
     * A pos-tag sequence processor to process pos-tag sequences that have been read.
     * @return
     */
    public PosTagSequenceProcessor getPosTagSequenceProcessor() {
        try {
            if (posTagSequenceProcessor == null && endModule.equals(Module.PosTagger)) {
                if (this.option == Option.posTagFeatureTester) {
                    File file = new File(this.getOutDir(), this.getBaseName() + "_featureTest.txt");
                    posTagSequenceProcessor = this.getPosTaggerService()
                            .getPosTagFeatureTester(this.getPosTaggerFeatures(), this.testWords, file);
                } else {
                    Reader templateReader = null;
                    if (templatePath == null) {
                        templateReader = new BufferedReader(
                                new InputStreamReader(getInputStreamFromResource(posTaggerTemplateName)));
                    } else {
                        templateReader = new BufferedReader(new FileReader(new File(templatePath)));
                    }
                    FreemarkerTemplateWriter templateWriter = new FreemarkerTemplateWriter(templateReader);
                    posTagSequenceProcessor = templateWriter;
                }
            }
            return posTagSequenceProcessor;
        } catch (Exception e) {
            LogUtils.logError(LOG, e);
            throw new RuntimeException(e);
        }
    }

    /**
     * A parse configuration processor to process parse configurations that have been read.
     * @return
     */
    public ParseConfigurationProcessor getParseConfigurationProcessor() {
        try {
            if (parseConfigurationProcessor == null && endModule.equals(Module.Parser)) {
                if (option == null) {
                    Reader templateReader = null;
                    if (templatePath == null) {
                        templateReader = new BufferedReader(
                                new InputStreamReader(getInputStreamFromResource(parserTemplateName)));
                    } else {
                        templateReader = new BufferedReader(new FileReader(new File(templatePath)));
                    }
                    FreemarkerTemplateWriter templateWriter = new FreemarkerTemplateWriter(templateReader);
                    parseConfigurationProcessor = templateWriter;
                } else if (option.equals(Option.loadParsingConstraints)) {
                    ParsingConstrainer constrainer = this.getParserService().getParsingConstrainer();
                    this.getOutDir();
                    File outFile = new File(outFilePath);
                    constrainer.setFile(outFile);
                    parseConfigurationProcessor = constrainer;
                } else if (option.equals(Option.parseFeatureTester)) {
                    File file = new File(this.getOutDir(), this.getBaseName() + "_featureTest.txt");
                    parseConfigurationProcessor = this.getParserService()
                            .getParseFeatureTester(this.getParserFeatures(), file);
                } else {
                    throw new TalismaneException("Unknown option: " + option.toString());
                }
            }
            return parseConfigurationProcessor;
        } catch (Exception e) {
            LogUtils.logError(LOG, e);
            throw new RuntimeException(e);
        }
    }

    /**
     * A token corpus reader to read a corpus pre-annotated in tokens.
     * Note that in general, any filters up to and including the tokeniser should be applied to the corpus reader.
     * @return
     */
    public TokeniserAnnotatedCorpusReader getTokenCorpusReader() {
        if (tokenCorpusReader == null) {
            TokenRegexBasedCorpusReader tokenRegexCorpusReader = this.getTokeniserService()
                    .getRegexBasedCorpusReader(this.getReader());
            if (this.getInputRegex() != null)
                tokenRegexCorpusReader.setRegex(this.getInputRegex());

            if (this.sentenceReaderPath != null) {

                try {
                    File sentenceReaderFile = new File(sentenceReaderPath);
                    Reader sentenceFileReader = new BufferedReader(
                            new InputStreamReader(new FileInputStream(sentenceReaderFile), this.getInputCharset()));
                    SentenceDetectorAnnotatedCorpusReader sentenceReader = this.getSentenceDetectorService()
                            .getDefaultReader(sentenceFileReader);
                    tokenRegexCorpusReader.setSentenceReader(sentenceReader);
                } catch (FileNotFoundException fnfe) {
                    LogUtils.logError(LOG, fnfe);
                    throw new RuntimeException(fnfe);
                }
            }
            this.tokenCorpusReader = tokenRegexCorpusReader;
        }
        this.setCorpusReaderAttributes(tokenCorpusReader);

        this.addTokenCorpusReaderFilters(tokenCorpusReader);
        return tokenCorpusReader;
    }

    public TokeniserAnnotatedCorpusReader getTokenEvaluationCorpusReader() {
        if (tokenEvaluationCorpusReader == null) {
            TokenRegexBasedCorpusReader tokenRegexCorpusReader = this.getTokeniserService()
                    .getRegexBasedCorpusReader(this.getEvaluationReader());
            if (this.getEvaluationRegex() != null)
                tokenRegexCorpusReader.setRegex(this.getEvaluationRegex());

            this.tokenEvaluationCorpusReader = tokenRegexCorpusReader;
        }
        this.setCorpusReaderAttributes(tokenEvaluationCorpusReader);

        this.addTokenCorpusReaderFilters(tokenEvaluationCorpusReader);
        return tokenEvaluationCorpusReader;
    }

    void addTokenCorpusReaderFilters(TokeniserAnnotatedCorpusReader corpusReader) {
        if (!tokenCorpusReaderFiltersAdded) {
            for (TokenFilter tokenFilter : this.getTokenFilters()) {
                corpusReader.addTokenFilter(tokenFilter);
            }

            if (startModule.equals(Module.PosTagger)) {
                ClassificationModel<PosTag> posTaggerModel = this.getPosTaggerModel();

                List<String> tokenFilterDescriptors = posTaggerModel.getDescriptors()
                        .get(TokenFilterService.TOKEN_FILTER_DESCRIPTOR_KEY);
                if (tokenFilterDescriptors != null) {
                    for (String descriptor : tokenFilterDescriptors) {
                        if (descriptor.length() > 0 && !descriptor.startsWith("#")) {
                            TokenFilter tokenFilter = this.getTokenFilterService().getTokenFilter(descriptor);
                            corpusReader.addTokenFilter(tokenFilter);
                        }
                    }
                }

                List<String> tokenSequenceFilterDescriptors = posTaggerModel.getDescriptors()
                        .get(TokenFilterService.TOKEN_SEQUENCE_FILTER_DESCRIPTOR_KEY);
                if (tokenSequenceFilterDescriptors != null) {
                    for (String descriptor : tokenSequenceFilterDescriptors) {
                        if (descriptor.length() > 0 && !descriptor.startsWith("#")) {
                            TokenSequenceFilter tokenSequenceFilter = this.getTokenFilterService()
                                    .getTokenSequenceFilter(descriptor);
                            corpusReader.addTokenSequenceFilter(tokenSequenceFilter);
                        }
                    }
                }
            }

            for (TokenSequenceFilter tokenSequenceFilter : this.getTokenSequenceFilters()) {
                corpusReader.addTokenSequenceFilter(tokenSequenceFilter);
            }
            this.tokenCorpusReaderFiltersAdded = true;
        }
    }

    public void setTokenCorpusReader(TokeniserAnnotatedCorpusReader tokenCorpusReader) {
        this.tokenCorpusReader = tokenCorpusReader;
    }

    /**
     * A pos tag corpus reader to read a corpus pre-annotated in postags.
     * Note that, in general, any filters up to and including the pos-tagger should be applied to the reader.
     * @return
     */
    public PosTagAnnotatedCorpusReader getPosTagCorpusReader() {
        if (posTagCorpusReader == null) {
            PosTagRegexBasedCorpusReader posTagRegexBasedCorpusReader = this.getPosTaggerService()
                    .getRegexBasedCorpusReader(this.getReader());
            if (this.getInputRegex() != null)
                posTagRegexBasedCorpusReader.setRegex(this.getInputRegex());
            posTagCorpusReader = posTagRegexBasedCorpusReader;
        }
        this.setCorpusReaderAttributes(posTagCorpusReader);

        this.addPosTagCorpusReaderFilters(posTagCorpusReader);
        return posTagCorpusReader;
    }

    public PosTagAnnotatedCorpusReader getPosTagEvaluationCorpusReader() {
        if (posTagEvaluationCorpusReader == null) {
            PosTagRegexBasedCorpusReader posTagRegexCorpusReader = this.getPosTaggerService()
                    .getRegexBasedCorpusReader(this.getEvaluationReader());
            if (this.getEvaluationRegex() != null)
                posTagRegexCorpusReader.setRegex(this.getEvaluationRegex());
            this.posTagEvaluationCorpusReader = posTagRegexCorpusReader;
        }
        this.addPosTagCorpusReaderFilters(posTagEvaluationCorpusReader);
        return posTagEvaluationCorpusReader;
    }

    void addPosTagCorpusReaderFilters(PosTagAnnotatedCorpusReader corpusReader) {
        if (!posTagCorpusReaderFiltersAdded) {
            if (this.getCommand() != Command.train) {
                MachineLearningModel myTokeniserModel = null;
                MachineLearningModel myPosTaggerModel = null;
                if (this.getStartModule().equals(Module.Tokeniser)) {
                    myTokeniserModel = this.getTokeniserModel();
                    myPosTaggerModel = this.getPosTaggerModel();
                } else if (this.getStartModule().equals(Module.PosTagger)) {
                    myTokeniserModel = this.getPosTaggerModel();
                    myPosTaggerModel = this.getPosTaggerModel();
                } else {
                    myTokeniserModel = this.getParserModel();
                    myPosTaggerModel = this.getParserModel();
                }

                List<String> tokenFilterDescriptors = myTokeniserModel.getDescriptors()
                        .get(TokenFilterService.TOKEN_FILTER_DESCRIPTOR_KEY);
                if (tokenFilterDescriptors != null) {
                    List<TokenFilter> tokenFilters = new ArrayList<TokenFilter>();
                    for (String descriptor : tokenFilterDescriptors) {
                        if (descriptor.length() > 0 && !descriptor.startsWith("#")) {
                            TokenFilter tokenFilter = this.getTokenFilterService().getTokenFilter(descriptor);
                            tokenFilters.add(tokenFilter);
                        }
                    }

                    for (TokenFilter tokenFilter : this.getTokenFilters()) {
                        tokenFilters.add(tokenFilter);
                    }

                    TokenSequenceFilter tokenFilterWrapper = this.getTokenFilterService()
                            .getTokenSequenceFilter(tokenFilters);
                    corpusReader.addTokenSequenceFilter(tokenFilterWrapper);
                }

                List<String> tokenSequenceFilterDescriptors = myTokeniserModel.getDescriptors()
                        .get(TokenFilterService.TOKEN_SEQUENCE_FILTER_DESCRIPTOR_KEY);
                if (tokenSequenceFilterDescriptors != null) {
                    for (String descriptor : tokenSequenceFilterDescriptors) {
                        if (descriptor.length() > 0 && !descriptor.startsWith("#")) {
                            TokenSequenceFilter tokenSequenceFilter = this.getTokenFilterService()
                                    .getTokenSequenceFilter(descriptor);
                            corpusReader.addTokenSequenceFilter(tokenSequenceFilter);
                        }
                    }
                }

                List<String> posTaggerPreprocessingFilters = myPosTaggerModel.getDescriptors()
                        .get(PosTagFilterService.POSTAG_PREPROCESSING_FILTER_DESCRIPTOR_KEY);
                if (posTaggerPreprocessingFilters != null) {
                    for (String descriptor : posTaggerPreprocessingFilters) {
                        if (descriptor.length() > 0 && !descriptor.startsWith("#")) {
                            TokenSequenceFilter tokenSequenceFilter = this.getTokenFilterService()
                                    .getTokenSequenceFilter(descriptor);
                            corpusReader.addTokenSequenceFilter(tokenSequenceFilter);
                        }
                    }
                }

                List<String> posTaggerPostProcessingFilters = myPosTaggerModel.getDescriptors()
                        .get(PosTagFilterService.POSTAG_POSTPROCESSING_FILTER_DESCRIPTOR_KEY);
                if (posTaggerPostProcessingFilters != null) {
                    for (String descriptor : posTaggerPostProcessingFilters) {
                        if (descriptor.length() > 0 && !descriptor.startsWith("#")) {
                            PosTagSequenceFilter posTagSequenceFilter = this.getPosTagFilterService()
                                    .getPosTagSequenceFilter(descriptor);
                            corpusReader.addPosTagSequenceFilter(posTagSequenceFilter);
                        }
                    }
                }
            } // do the models exist already?

            for (TokenSequenceFilter tokenFilter : this.getTokenSequenceFilters()) {
                corpusReader.addTokenSequenceFilter(tokenFilter);
            }

            for (TokenSequenceFilter tokenFilter : this.getTokenSequenceFilters()) {
                corpusReader.addTokenSequenceFilter(tokenFilter);
            }

            for (PosTagSequenceFilter posTagSequenceFilter : this.getDefaultPosTagSequenceFilters()) {
                corpusReader.addPosTagSequenceFilter(posTagSequenceFilter);
            }

            posTagCorpusReaderFiltersAdded = true;
        }
    }

    /**
     * A parser corpus reader to read a corpus pre-annotated in dependencies.
     * @return
     */
    public ParserAnnotatedCorpusReader getParserCorpusReader() {
        if (parserCorpusReader == null) {
            ParserRegexBasedCorpusReader parserRegexCorpusReader = this.getParserService()
                    .getRegexBasedCorpusReader(this.getReader());
            if (this.getInputRegex() != null)
                parserRegexCorpusReader.setRegex(this.getInputRegex());
            parserRegexCorpusReader.setPredictTransitions(predictTransitions);
            if (this.excludeFileName != null)
                parserRegexCorpusReader.setExcludeFileName(this.excludeFileName);
            this.parserCorpusReader = parserRegexCorpusReader;
        }

        if (!parserCorpusReaderDecorated) {
            this.setCorpusReaderAttributes(parserCorpusReader);
            this.addParserCorpusReaderFilters(parserCorpusReader);
            parserCorpusReaderDecorated = true;
        }

        return parserCorpusReader;
    }

    void setCorpusReaderAttributes(AnnotatedCorpusReader corpusReader) {
        corpusReader.setMaxSentenceCount(maxSentenceCount);
        if (crossValidationSize > 0)
            corpusReader.setCrossValidationSize(crossValidationSize);
        if (includeIndex >= 0)
            corpusReader.setIncludeIndex(includeIndex);
        if (excludeIndex >= 0)
            corpusReader.setExcludeIndex(excludeIndex);
    }

    public ParserAnnotatedCorpusReader getParserEvaluationCorpusReader() {
        if (parserEvaluationCorpusReader == null) {
            ParserRegexBasedCorpusReader parserRegexCorpusReader = this.getParserService()
                    .getRegexBasedCorpusReader(this.getEvaluationReader());
            if (this.getEvaluationRegex() != null)
                parserRegexCorpusReader.setRegex(this.getEvaluationRegex());
            parserRegexCorpusReader.setPredictTransitions(predictTransitions);

            this.parserEvaluationCorpusReader = parserRegexCorpusReader;
        }
        this.addParserCorpusReaderFilters(parserEvaluationCorpusReader);
        return parserEvaluationCorpusReader;
    }

    public String getEvaluationFilePath() {
        return evaluationFilePath;
    }

    public void setParserEvaluationCorpusReader(ParserAnnotatedCorpusReader parserEvaluationCorpusReader) {
        this.parserEvaluationCorpusReader = parserEvaluationCorpusReader;
    }

    public void setPosTagEvaluationCorpusReader(PosTagAnnotatedCorpusReader posTagEvaluationCorpusReader) {
        this.posTagEvaluationCorpusReader = posTagEvaluationCorpusReader;
    }

    void addParserCorpusReaderFilters(ParserAnnotatedCorpusReader corpusReader) {
        if (!parserCorpusReaderFiltersAdded) {
            if (this.getCommand() != Command.train) {
                MachineLearningModel myTokeniserModel = null;
                MachineLearningModel myPosTaggerModel = null;
                MachineLearningModel myParserModel = null;
                if (this.getStartModule().equals(Module.Tokeniser)) {
                    myTokeniserModel = this.getTokeniserModel();
                    myPosTaggerModel = this.getPosTaggerModel();
                    myParserModel = this.getParserModel();
                } else if (this.getStartModule().equals(Module.PosTagger)) {
                    myTokeniserModel = this.getPosTaggerModel();
                    myPosTaggerModel = this.getPosTaggerModel();
                    myParserModel = this.getParserModel();
                } else {
                    myTokeniserModel = this.getParserModel();
                    myPosTaggerModel = this.getParserModel();
                    myParserModel = this.getParserModel();
                }

                List<String> tokenFilterDescriptors = myTokeniserModel.getDescriptors()
                        .get(TokenFilterService.TOKEN_FILTER_DESCRIPTOR_KEY);
                if (tokenFilterDescriptors != null) {
                    List<TokenFilter> tokenFilters = new ArrayList<TokenFilter>();
                    for (String descriptor : tokenFilterDescriptors) {
                        if (descriptor.length() > 0 && !descriptor.startsWith("#")) {
                            TokenFilter tokenFilter = this.getTokenFilterService().getTokenFilter(descriptor);
                            tokenFilters.add(tokenFilter);
                        }
                    }

                    for (TokenFilter tokenFilter : this.getTokenFilters()) {
                        tokenFilters.add(tokenFilter);
                    }

                    TokenSequenceFilter tokenFilterWrapper = this.getTokenFilterService()
                            .getTokenSequenceFilter(tokenFilters);
                    corpusReader.addTokenSequenceFilter(tokenFilterWrapper);
                }

                List<String> tokenSequenceFilterDescriptors = myTokeniserModel.getDescriptors()
                        .get(TokenFilterService.TOKEN_SEQUENCE_FILTER_DESCRIPTOR_KEY);
                if (tokenSequenceFilterDescriptors != null) {
                    for (String descriptor : tokenSequenceFilterDescriptors) {
                        if (descriptor.length() > 0 && !descriptor.startsWith("#")) {
                            TokenSequenceFilter tokenSequenceFilter = this.getTokenFilterService()
                                    .getTokenSequenceFilter(descriptor);
                            corpusReader.addTokenSequenceFilter(tokenSequenceFilter);
                        }
                    }
                }

                List<String> posTaggerPreprocessingFilters = myPosTaggerModel.getDescriptors()
                        .get(PosTagFilterService.POSTAG_PREPROCESSING_FILTER_DESCRIPTOR_KEY);
                if (posTaggerPreprocessingFilters != null) {
                    for (String descriptor : posTaggerPreprocessingFilters) {
                        if (descriptor.length() > 0 && !descriptor.startsWith("#")) {
                            TokenSequenceFilter tokenSequenceFilter = this.getTokenFilterService()
                                    .getTokenSequenceFilter(descriptor);
                            corpusReader.addTokenSequenceFilter(tokenSequenceFilter);
                        }
                    }
                }

                List<String> posTaggerPostProcessingFilters = myParserModel.getDescriptors()
                        .get(PosTagFilterService.POSTAG_POSTPROCESSING_FILTER_DESCRIPTOR_KEY);
                if (posTaggerPostProcessingFilters != null) {
                    for (String descriptor : posTaggerPostProcessingFilters) {
                        if (descriptor.length() > 0 && !descriptor.startsWith("#")) {
                            PosTagSequenceFilter posTagSequenceFilter = this.getPosTagFilterService()
                                    .getPosTagSequenceFilter(descriptor);
                            corpusReader.addPosTagSequenceFilter(posTagSequenceFilter);
                        }
                    }
                }
            } // models exist already?

            for (TokenSequenceFilter tokenFilter : this.getTokenSequenceFilters()) {
                corpusReader.addTokenSequenceFilter(tokenFilter);
            }
            for (TokenSequenceFilter tokenFilter : this.getTokenSequenceFilters()) {
                corpusReader.addTokenSequenceFilter(tokenFilter);
            }
            for (PosTagSequenceFilter posTagSequenceFilter : this.getDefaultPosTagSequenceFilters()) {
                corpusReader.addPosTagSequenceFilter(posTagSequenceFilter);
            }
            parserCorpusReaderFiltersAdded = true;
        }
    }

    public void setPosTagCorpusReader(PosTagAnnotatedCorpusReader posTagCorpusReader) {
        this.posTagCorpusReader = posTagCorpusReader;
    }

    public void setParserCorpusReader(ParserAnnotatedCorpusReader parserCorpusReader) {
        this.parserCorpusReader = parserCorpusReader;
    }

    /**
     * Get a parser evaluator if command=evaluate and endModule=parser.
     * @return
     */
    public ParserEvaluator getParserEvaluator() {
        try {
            if (parserEvaluator == null) {
                parserEvaluator = this.getParserService().getParserEvaluator();
                if (startModule.equals(Module.Tokeniser)) {
                    parserEvaluator.setTokeniser(this.getTokeniser());
                    parserEvaluator.setPosTagger(this.getPosTagger());
                } else if (startModule.equals(Module.PosTagger)) {
                    parserEvaluator.setPosTagger(this.getPosTagger());
                }
                parserEvaluator.setParser(this.getParser());

                File fscoreFile = new File(this.getOutDir(), this.getBaseName() + ".fscores.csv");

                ParseEvaluationFScoreCalculator parseFScoreCalculator = new ParseEvaluationFScoreCalculator(
                        fscoreFile);
                parseFScoreCalculator.setLabeledEvaluation(this.labeledEvaluation);
                parseFScoreCalculator.setSkipLabel(skipLabel);

                if (parserEvaluator.getTokeniser() != null)
                    parseFScoreCalculator.setHasTokeniser(true);
                if (parserEvaluator.getPosTagger() != null)
                    parseFScoreCalculator.setHasPosTagger(true);
                parserEvaluator.addObserver(parseFScoreCalculator);

                if (outputGuesses) {
                    File csvFile = new File(this.getOutDir(), this.getBaseName() + "_sentences.csv");
                    csvFile.delete();
                    csvFile.createNewFile();
                    Writer csvFileWriter = new BufferedWriter(
                            new OutputStreamWriter(new FileOutputStream(csvFile, false), "UTF8"));
                    int guessCount = 1;
                    if (outputGuessCount > 0)
                        guessCount = outputGuessCount;
                    else
                        guessCount = this.getParser().getBeamWidth();

                    ParseEvaluationSentenceWriter sentenceWriter = new ParseEvaluationSentenceWriter(csvFileWriter,
                            guessCount);
                    if (parserEvaluator.getTokeniser() != null)
                        sentenceWriter.setHasTokeniser(true);
                    if (parserEvaluator.getPosTagger() != null)
                        sentenceWriter.setHasPosTagger(true);
                    parserEvaluator.addObserver(sentenceWriter);
                }

                if (includeDistanceFScores) {
                    File csvFile = new File(this.getOutDir(), this.getBaseName() + ".distances.csv");
                    csvFile.delete();
                    csvFile.createNewFile();
                    Writer csvFileWriter = new BufferedWriter(
                            new OutputStreamWriter(new FileOutputStream(csvFile, false), "UTF8"));
                    ParserFScoreCalculatorByDistance calculator = new ParserFScoreCalculatorByDistance(
                            csvFileWriter);
                    calculator.setLabeledEvaluation(this.labeledEvaluation);
                    calculator.setSkipLabel(skipLabel);
                    parserEvaluator.addObserver(calculator);
                }

                Reader templateReader = null;
                if (templatePath == null) {
                    templateReader = new BufferedReader(
                            new InputStreamReader(getInputStreamFromResource(parserTemplateName)));
                } else {
                    templateReader = new BufferedReader(new FileReader(new File(templatePath)));
                }

                File freemarkerFile = new File(this.getOutDir(), this.getBaseName() + "_output.txt");
                freemarkerFile.delete();
                freemarkerFile.createNewFile();
                Writer freemakerFileWriter = new BufferedWriter(
                        new OutputStreamWriter(new FileOutputStream(freemarkerFile, false), "UTF8"));
                ParseEvaluationGuessTemplateWriter templateWriter = new ParseEvaluationGuessTemplateWriter(
                        freemakerFileWriter, templateReader);
                parserEvaluator.addObserver(templateWriter);
                parserEvaluator.setSentenceCount(maxSentenceCount);
                parserEvaluator.setPropagateBeam(propagateBeam);
            }

            return parserEvaluator;
        } catch (Exception e) {
            LogUtils.logError(LOG, e);
            throw new RuntimeException(e);
        }
    }

    /**
     * Get a parser comparator if command=compare and endModule=parser.
     * @return
     */
    public ParseComparator getParseComparator() {
        try {
            if (parseComparator == null) {
                parseComparator = this.getParserService().getParseComparator();
                File fscoreFile = new File(this.getOutDir(), this.getBaseName() + ".fscores.csv");

                ParseEvaluationFScoreCalculator parseFScoreCalculator = new ParseEvaluationFScoreCalculator(
                        fscoreFile);
                parseFScoreCalculator.setLabeledEvaluation(this.labeledEvaluation);
                parseFScoreCalculator.setSkipLabel(skipLabel);

                parseComparator.addObserver(parseFScoreCalculator);

                if (includeDistanceFScores) {
                    File csvFile = new File(this.getOutDir(), this.getBaseName() + ".distances.csv");
                    csvFile.delete();
                    csvFile.createNewFile();
                    Writer csvFileWriter = new BufferedWriter(
                            new OutputStreamWriter(new FileOutputStream(csvFile, false), "UTF8"));
                    ParserFScoreCalculatorByDistance calculator = new ParserFScoreCalculatorByDistance(
                            csvFileWriter);
                    calculator.setLabeledEvaluation(this.labeledEvaluation);
                    calculator.setSkipLabel(skipLabel);
                    parseComparator.addObserver(calculator);
                }

                parseComparator.setSentenceCount(maxSentenceCount);
            }

            return parseComparator;
        } catch (Exception e) {
            LogUtils.logError(LOG, e);
            throw new RuntimeException(e);
        }
    }

    /**
     * Get a tokeniser evaluator if command=evaluate and endModule=tokeniser.
     * @return
     */
    public TokeniserEvaluator getTokeniserEvaluator() {
        if (tokeniserEvaluator == null) {
            tokeniserEvaluator = this.getTokeniserService().getTokeniserEvaluator(this.getTokeniser());

            List<TokenEvaluationObserver> observers = this.getTokenEvaluationObservers();
            for (TokenEvaluationObserver observer : observers)
                tokeniserEvaluator.addObserver(observer);

            tokeniserEvaluator.setSentenceCount(maxSentenceCount);
        }
        return tokeniserEvaluator;
    }

    private List<TokenEvaluationObserver> getTokenEvaluationObservers() {
        try {
            List<TokenEvaluationObserver> observers = new ArrayList<TokenEvaluationObserver>();
            Writer errorFileWriter = null;
            File errorFile = new File(this.getOutDir(), this.getBaseName() + ".errorList.txt");
            errorFile.delete();
            errorFile.createNewFile();
            errorFileWriter = new BufferedWriter(
                    new OutputStreamWriter(new FileOutputStream(errorFile, false), "UTF8"));

            Writer csvErrorFileWriter = null;
            File csvErrorFile = new File(this.getOutDir(), this.getBaseName() + ".errors.csv");
            csvErrorFile.delete();
            csvErrorFile.createNewFile();
            csvErrorFileWriter = new BufferedWriter(
                    new OutputStreamWriter(new FileOutputStream(csvErrorFile, false), "UTF8"));

            File fScoreFile = new File(this.getOutDir(), this.getBaseName() + ".fscores.csv");

            TokenEvaluationFScoreCalculator tokenFScoreCalculator = new TokenEvaluationFScoreCalculator();
            tokenFScoreCalculator.setErrorWriter(errorFileWriter);
            tokenFScoreCalculator.setCsvErrorWriter(csvErrorFileWriter);
            tokenFScoreCalculator.setFScoreFile(fScoreFile);
            observers.add(tokenFScoreCalculator);

            Writer corpusFileWriter = null;
            File corpusFile = new File(this.getOutDir(), this.getBaseName() + ".corpus.txt");
            corpusFile.delete();
            corpusFile.createNewFile();
            corpusFileWriter = new BufferedWriter(
                    new OutputStreamWriter(new FileOutputStream(corpusFile, false), "UTF8"));

            TokenEvaluationCorpusWriter corpusWriter = new TokenEvaluationCorpusWriter(corpusFileWriter);
            observers.add(corpusWriter);

            Reader templateReader = null;
            if (templatePath == null) {
                templateReader = new BufferedReader(
                        new InputStreamReader(getInputStreamFromResource(tokeniserTemplateName)));
            } else {
                templateReader = new BufferedReader(new FileReader(new File(templatePath)));
            }

            File freemarkerFile = new File(this.getOutDir(), this.getBaseName() + "_output.txt");
            freemarkerFile.delete();
            freemarkerFile.createNewFile();
            Writer freemakerFileWriter = new BufferedWriter(
                    new OutputStreamWriter(new FileOutputStream(freemarkerFile, false), "UTF8"));
            TokeniserGuessTemplateWriter templateWriter = new TokeniserGuessTemplateWriter(freemakerFileWriter,
                    templateReader);
            observers.add(templateWriter);

            return observers;
        } catch (IOException e) {
            LogUtils.logError(LOG, e);
            throw new RuntimeException(e);
        }
    }

    /**
     * Get a token comparator if command=compare and endModule=parser.
     * @return
     */
    public TokenComparator getTokenComparator() {
        try {
            if (tokenComparator == null) {
                TokeniserPatternManager tokeniserPatternManager = null;

                Tokeniser tokeniser = this.getTokeniser();
                if (tokeniser instanceof PatternTokeniser) {
                    PatternTokeniser patternTokeniser = (PatternTokeniser) tokeniser;
                    tokeniserPatternManager = patternTokeniser.getTokeniserPatternManager();
                }
                tokenComparator = this.getTokeniserService().getTokenComparator(this.getTokenCorpusReader(),
                        this.getTokenEvaluationCorpusReader(), tokeniserPatternManager);

                List<TokenEvaluationObserver> observers = this.getTokenEvaluationObservers();
                for (TokenEvaluationObserver observer : observers)
                    tokenComparator.addObserver(observer);

                tokenComparator.setSentenceCount(maxSentenceCount);
            }

            return tokenComparator;
        } catch (Exception e) {
            LogUtils.logError(LOG, e);
            throw new RuntimeException(e);
        }
    }

    /**
     * Get a pos-tagger evaluator if command=evaluate and endModule=posTagger.
     * @return
     */
    public PosTaggerEvaluator getPosTaggerEvaluator() {
        try {
            if (posTaggerEvaluator == null) {
                posTaggerEvaluator = this.getPosTaggerService().getPosTaggerEvaluator(this.getPosTagger());

                if (startModule.equals(Module.Tokeniser)) {
                    posTaggerEvaluator.setTokeniser(this.getTokeniser());
                }

                if (outputGuesses) {
                    File csvFile = new File(this.getOutDir(), this.getBaseName() + "_sentences.csv");
                    csvFile.delete();
                    csvFile.createNewFile();
                    Writer csvFileWriter = new BufferedWriter(
                            new OutputStreamWriter(new FileOutputStream(csvFile, false), "UTF8"));

                    int guessCount = 1;
                    if (outputGuessCount > 0)
                        guessCount = outputGuessCount;
                    else if (this.getPosTagger() instanceof NonDeterministicPosTagger)
                        guessCount = ((NonDeterministicPosTagger) this.getPosTagger()).getBeamWidth();

                    PosTagEvaluationSentenceWriter sentenceWriter = new PosTagEvaluationSentenceWriter(
                            csvFileWriter, guessCount);
                    posTaggerEvaluator.addObserver(sentenceWriter);
                }

                File fscoreFile = new File(this.getOutDir(), this.getBaseName() + ".fscores.csv");

                PosTagEvaluationFScoreCalculator posTagFScoreCalculator = new PosTagEvaluationFScoreCalculator(
                        fscoreFile);
                posTaggerEvaluator.addObserver(posTagFScoreCalculator);

                Reader templateReader = null;
                if (templatePath == null) {
                    templateReader = new BufferedReader(
                            new InputStreamReader(getInputStreamFromResource(posTaggerTemplateName)));
                } else {
                    templateReader = new BufferedReader(new FileReader(new File(templatePath)));
                }

                File freemarkerFile = new File(this.getOutDir(), this.getBaseName() + "_output.txt");
                freemarkerFile.delete();
                freemarkerFile.createNewFile();
                Writer freemakerFileWriter = new BufferedWriter(
                        new OutputStreamWriter(new FileOutputStream(freemarkerFile, false), "UTF8"));
                PosTaggerGuessTemplateWriter templateWriter = new PosTaggerGuessTemplateWriter(freemakerFileWriter,
                        templateReader);
                posTaggerEvaluator.addObserver(templateWriter);

                if (includeLexiconCoverage) {
                    File lexiconCoverageFile = new File(this.getOutDir(), this.getBaseName() + ".unknown.csv");
                    PosTagEvaluationLexicalCoverageTester lexiconCoverageTester = new PosTagEvaluationLexicalCoverageTester(
                            lexiconCoverageFile);
                    posTaggerEvaluator.addObserver(lexiconCoverageTester);
                }

                posTaggerEvaluator.setPropagateBeam(propagateBeam);
                posTaggerEvaluator.setSentenceCount(maxSentenceCount);
            }
            return posTaggerEvaluator;
        } catch (Exception e) {
            LogUtils.logError(LOG, e);
            throw new RuntimeException(e);
        }
    }

    /**
     * Get a pos-tag comparator if command=compare and endModule=parser.
     * @return
     */
    public PosTagComparator getPosTagComparator() {
        try {
            if (posTagComparator == null) {
                posTagComparator = this.getPosTaggerService().getPosTagComparator();
                File fscoreFile = new File(this.getOutDir(), this.getBaseName() + ".fscores.csv");

                PosTagEvaluationFScoreCalculator fScoreCalculator = new PosTagEvaluationFScoreCalculator(
                        fscoreFile);

                posTagComparator.addObserver(fScoreCalculator);

                posTagComparator.setSentenceCount(maxSentenceCount);
            }

            return posTagComparator;
        } catch (Exception e) {
            LogUtils.logError(LOG, e);
            throw new RuntimeException(e);
        }
    }

    /**
     * The base name, out of which to construct output file names.
     * @return
     */
    public String getBaseName() {
        if (baseName == null) {
            baseName = "Talismane";
            if (outFilePath != null) {
                if (outFilePath.indexOf('.') > 0)
                    baseName = outFilePath.substring(outFilePath.lastIndexOf('/') + 1,
                            outFilePath.lastIndexOf('.'));
                else
                    baseName = outFilePath.substring(outFilePath.lastIndexOf('/') + 1);
            } else if (inFilePath != null) {
                if (inFilePath.indexOf('.') > 0)
                    baseName = inFilePath.substring(inFilePath.lastIndexOf('/') + 1, inFilePath.lastIndexOf('.'));
                else
                    baseName = inFilePath.substring(inFilePath.lastIndexOf('/') + 1);
            } else if (sentenceModelFilePath != null && module.equals(Talismane.Module.SentenceDetector)
                    || endModule.equals(Talismane.Module.SentenceDetector)) {
                if (sentenceModelFilePath.indexOf('.') > 0)
                    baseName = sentenceModelFilePath.substring(sentenceModelFilePath.lastIndexOf('/') + 1,
                            sentenceModelFilePath.lastIndexOf('.'));
                else
                    baseName = sentenceModelFilePath.substring(sentenceModelFilePath.lastIndexOf('/') + 1);
            } else if (tokeniserModelFilePath != null && (module.equals(Talismane.Module.Tokeniser)
                    || endModule.equals(Talismane.Module.Tokeniser))) {
                if (tokeniserModelFilePath.indexOf('.') > 0)
                    baseName = tokeniserModelFilePath.substring(tokeniserModelFilePath.lastIndexOf('/') + 1,
                            tokeniserModelFilePath.lastIndexOf('.'));
                else
                    baseName = tokeniserModelFilePath.substring(tokeniserModelFilePath.lastIndexOf('/') + 1);
            } else if (posTaggerModelFilePath != null && (module.equals(Talismane.Module.PosTagger)
                    || endModule.equals(Talismane.Module.PosTagger))) {
                if (posTaggerModelFilePath.indexOf('.') > 0)
                    baseName = posTaggerModelFilePath.substring(posTaggerModelFilePath.lastIndexOf('/') + 1,
                            posTaggerModelFilePath.lastIndexOf('.'));
                else
                    baseName = posTaggerModelFilePath.substring(posTaggerModelFilePath.lastIndexOf('/') + 1);
            } else if (parserModelFilePath != null
                    && (module.equals(Talismane.Module.Parser) || endModule.equals(Talismane.Module.Parser))) {
                if (parserModelFilePath.indexOf('.') > 0)
                    baseName = parserModelFilePath.substring(parserModelFilePath.lastIndexOf('/') + 1,
                            parserModelFilePath.lastIndexOf('.'));
                else
                    baseName = parserModelFilePath.substring(parserModelFilePath.lastIndexOf('/') + 1);
            }
            baseName = baseName + suffix;
        }
        return baseName;
    }

    public PosTaggerService getPosTaggerService() {
        if (posTaggerService == null) {
            posTaggerService = talismaneServiceLocator.getPosTaggerServiceLocator().getPosTaggerService();
        }
        return posTaggerService;
    }

    public void setPosTaggerService(PosTaggerService posTaggerService) {
        this.posTaggerService = posTaggerService;
    }

    public ParserService getParserService() {
        if (parserService == null) {
            parserService = talismaneServiceLocator.getParserServiceLocator().getParserService();
        }
        return parserService;
    }

    public void setParserService(ParserService parserService) {
        this.parserService = parserService;
    }

    public PosTaggerFeatureService getPosTaggerFeatureService() {
        if (posTaggerFeatureService == null) {
            posTaggerFeatureService = talismaneServiceLocator.getPosTaggerFeatureServiceLocator()
                    .getPosTaggerFeatureService();
        }
        return posTaggerFeatureService;
    }

    public void setPosTaggerFeatureService(PosTaggerFeatureService posTaggerFeatureService) {
        this.posTaggerFeatureService = posTaggerFeatureService;
    }

    public ParserFeatureService getParserFeatureService() {
        if (parserFeatureService == null) {
            parserFeatureService = talismaneServiceLocator.getParserFeatureServiceLocator()
                    .getParserFeatureService();
        }
        return parserFeatureService;
    }

    public void setParserFeatureService(ParserFeatureService parserFeatureService) {
        this.parserFeatureService = parserFeatureService;
    }

    public FilterService getFilterService() {
        if (filterService == null) {
            filterService = talismaneServiceLocator.getFilterServiceLocator().getFilterService();
        }
        return filterService;
    }

    public void setFilterService(FilterService filterService) {
        this.filterService = filterService;
    }

    public TokenFilterService getTokenFilterService() {
        if (tokenFilterService == null) {
            tokenFilterService = talismaneServiceLocator.getTokenFilterServiceLocator().getTokenFilterService();
        }
        return tokenFilterService;
    }

    private PosTagFilterService getPosTagFilterService() {
        if (posTagFilterService == null) {
            posTagFilterService = talismaneServiceLocator.getPosTagFilterServiceLocator().getPosTagFilterService();
        }
        return posTagFilterService;
    }

    public void setTokenFilterService(TokenFilterService tokenFilterService) {
        this.tokenFilterService = tokenFilterService;
    }

    public SentenceDetectorService getSentenceDetectorService() {
        if (sentenceDetectorService == null) {
            sentenceDetectorService = talismaneServiceLocator.getSentenceDetectorServiceLocator()
                    .getSentenceDetectorService();
        }
        return sentenceDetectorService;
    }

    public void setSentenceDetectorService(SentenceDetectorService sentenceDetectorService) {
        this.sentenceDetectorService = sentenceDetectorService;
    }

    public SentenceDetectorFeatureService getSentenceDetectorFeatureService() {
        if (sentenceDetectorFeatureService == null) {
            sentenceDetectorFeatureService = talismaneServiceLocator.getSentenceDetectorFeatureServiceLocator()
                    .getSentenceDetectorFeatureService();
        }
        return sentenceDetectorFeatureService;
    }

    public void setSentenceDetectorFeatureService(SentenceDetectorFeatureService sentenceDetectorFeatureService) {
        this.sentenceDetectorFeatureService = sentenceDetectorFeatureService;
    }

    public MachineLearningService getMachineLearningService() {
        if (machineLearningService == null) {
            machineLearningService = talismaneServiceLocator.getMachineLearningServiceLocator()
                    .getMachineLearningService();
        }
        return machineLearningService;
    }

    public void setMachineLearningService(MachineLearningService machineLearningService) {
        this.machineLearningService = machineLearningService;
    }

    public TokeniserPatternService getTokeniserPatternService() {
        if (tokeniserPatternService == null) {
            tokeniserPatternService = talismaneServiceLocator.getTokenPatternServiceLocator()
                    .getTokeniserPatternService();
        }
        return tokeniserPatternService;
    }

    public void setTokeniserPatternService(TokeniserPatternService tokeniserPatternService) {
        this.tokeniserPatternService = tokeniserPatternService;
    }

    public TokenFeatureService getTokenFeatureService() {
        if (tokenFeatureService == null) {
            tokenFeatureService = talismaneServiceLocator.getTokenFeatureServiceLocator().getTokenFeatureService();
        }
        return tokenFeatureService;
    }

    public void setTokenFeatureService(TokenFeatureService tokenFeatureService) {
        this.tokenFeatureService = tokenFeatureService;
    }

    public TokeniserService getTokeniserService() {
        if (this.tokeniserService == null)
            this.tokeniserService = talismaneServiceLocator.getTokeniserServiceLocator().getTokeniserService();
        return tokeniserService;
    }

    public void setTokeniserService(TokeniserService tokeniserService) {
        this.tokeniserService = tokeniserService;
    }

    TalismaneServiceInternal getTalismaneService() {
        if (this.talismaneService == null)
            this.talismaneService = talismaneServiceLocator.getTalismaneServiceInternal();
        return talismaneService;
    }

    void setTalismaneServiceInternal(TalismaneServiceInternal talismaneService) {
        this.talismaneService = talismaneService;
    }

    /**
     * Does this instance of Talismane need a sentence detector to perform the requested processing.
     */
    public boolean needsSentenceDetector() {
        return startModule.compareTo(Module.SentenceDetector) <= 0
                && endModule.compareTo(Module.SentenceDetector) >= 0;
    }

    /**
     * Does this instance of Talismane need a tokeniser to perform the requested processing.
     */
    public boolean needsTokeniser() {
        return startModule.compareTo(Module.Tokeniser) <= 0 && endModule.compareTo(Module.Tokeniser) >= 0;
    }

    /**
     * Does this instance of Talismane need a pos tagger to perform the requested processing.
     */
    public boolean needsPosTagger() {
        return startModule.compareTo(Module.PosTagger) <= 0 && endModule.compareTo(Module.PosTagger) >= 0;
    }

    /**
     * Does this instance of Talismane need a parser to perform the requested processing.
     */
    public boolean needsParser() {
        return startModule.compareTo(Module.Parser) <= 0 && endModule.compareTo(Module.Parser) >= 0;
    }

    private static InputStream getInputStreamFromResource(String resource) {
        String path = "/com/joliciel/talismane/output/" + resource;
        InputStream inputStream = Talismane.class.getResourceAsStream(path);

        return inputStream;
    }

    public String getInFilePath() {
        return inFilePath;
    }

    public boolean isLogStats() {
        return logStats;
    }

    public SentenceDetectorAnnotatedCorpusReader getSentenceCorpusReader() {
        if (sentenceCorpusReader == null) {
            sentenceCorpusReader = this.getSentenceDetectorService().getDefaultReader(this.getReader());
        }
        this.setCorpusReaderAttributes(sentenceCorpusReader);
        return sentenceCorpusReader;
    }

    public void setSentenceCorpusReader(SentenceDetectorAnnotatedCorpusReader sentenceCorpusReader) {
        this.sentenceCorpusReader = sentenceCorpusReader;
    }

    public int getTokeniserBeamWidth() {
        return tokeniserBeamWidth;
    }

    public int getPosTaggerBeamWidth() {
        return posTaggerBeamWidth;
    }

    public int getParserBeamWidth() {
        return parserBeamWidth;
    }

    public boolean isPropagateTokeniserBeam() {
        return propagateTokeniserBeam;
    }

    public boolean isPropagatePosTaggerBeam() {
        return propagateBeam;
    }

    /**
     * the minimum block size, in characters, to process by the sentence detector. Filters are applied to a concatenation of the previous block, the current block,
     * and the next block prior to sentence detection, in order to ensure that a filter which crosses block boundaries is correctly applied.
     * It is not legal to have a filter which matches text greater than a block size, since this could result in a filter which stops analysis but doesn't start it again correctly,
     * or vice versa. Block size can be increased if really big filters are really required. Default is 1000.
     * @return
     */
    public int getBlockSize() {
        return blockSize;
    }

    public void setBlockSize(int blockSize) {
        this.blockSize = blockSize;
    }

    public File getPerformanceConfigFile() {
        return performanceConfigFile;
    }

    public void setPerformanceConfigFile(File performanceConfigFile) {
        this.performanceConfigFile = performanceConfigFile;
    }

    /**
     * Should the parser corpus reader predict the transitions or not?
     * @return
     */
    public boolean isPredictTransitions() {
        return predictTransitions;
    }

    public void setPredictTransitions(boolean predictTransitions) {
        this.predictTransitions = predictTransitions;
    }

    public Mode getMode() {
        return mode;
    }

    public void setMode(Mode mode) {
        this.mode = mode;
    }

    public Talismane getTalismane() {
        Talismane talismane = null;
        if (this.getMode() == Mode.normal) {
            talismane = this.getTalismaneService().getTalismane(this);
        } else if (this.getMode() == Mode.server) {
            talismane = this.getTalismaneService().getTalismaneServer(this);
        } else {
            throw new TalismaneException("Unknown mode: " + this.getMode().name());
        }
        return talismane;
    }

    public Map<String, Object> getTrainParameters() {
        Map<String, Object> trainParameters = new HashMap<String, Object>();
        if (algorithm == MachineLearningAlgorithm.MaxEnt) {
            trainParameters.put(MaxentModelTrainer.MaxentModelParameter.Iterations.name(), iterations);
            trainParameters.put(MaxentModelTrainer.MaxentModelParameter.Cutoff.name(), cutoff);
        } else if (algorithm == MachineLearningAlgorithm.Perceptron
                || algorithm == MachineLearningAlgorithm.PerceptronRanking) {
            trainParameters.put(PerceptronClassificationModelTrainer.PerceptronModelParameter.Iterations.name(),
                    iterations);
            trainParameters.put(PerceptronClassificationModelTrainer.PerceptronModelParameter.Cutoff.name(),
                    cutoff);
            trainParameters.put(
                    PerceptronClassificationModelTrainer.PerceptronModelParameter.AverageAtIntervals.name(),
                    averageAtIntervals);

            if (perceptronTolerance >= 0)
                trainParameters.put(PerceptronClassificationModelTrainer.PerceptronModelParameter.Tolerance.name(),
                        perceptronTolerance);
        } else if (algorithm == MachineLearningAlgorithm.LinearSVM) {
            trainParameters.put(LinearSVMModelTrainer.LinearSVMModelParameter.Cutoff.name(), cutoff);
            if (solverType != null)
                trainParameters.put(LinearSVMModelTrainer.LinearSVMModelParameter.SolverType.name(), solverType);
            if (constraintViolationCost >= 0)
                trainParameters.put(LinearSVMModelTrainer.LinearSVMModelParameter.ConstraintViolationCost.name(),
                        constraintViolationCost);
            if (epsilon >= 0)
                trainParameters.put(LinearSVMModelTrainer.LinearSVMModelParameter.Epsilon.name(), epsilon);
        }
        return trainParameters;
    }

    public Map<String, List<String>> getDescriptors() {
        if (this.descriptors == null) {
            descriptors = new HashMap<String, List<String>>();
        }
        return descriptors;
    }

    public MachineLearningAlgorithm getAlgorithm() {
        return algorithm;
    }

    public ExternalResourceFinder getExternalResourceFinder() {
        return externalResourceFinder;
    }

    public List<Integer> getPerceptronObservationPoints() {
        return perceptronObservationPoints;
    }

    public ParsingConstrainer getParsingConstrainer() {
        if (parsingConstrainer == null) {
            if (parsingConstrainerPath == null) {
                throw new RuntimeException("Missing argument: parsingConstrainer");
            }
            parsingConstrainer = parserService.getParsingConstrainer(new File(parsingConstrainerPath));
        }
        return parsingConstrainer;
    }

    public String getPosTaggerModelFilePath() {
        return posTaggerModelFilePath;
    }

    public String getTokeniserModelFilePath() {
        return tokeniserModelFilePath;
    }

    public String getSentenceModelFilePath() {
        return sentenceModelFilePath;
    }

    public String getParserModelFilePath() {
        return parserModelFilePath;
    }

    public PatternTokeniserType getPatternTokeniserType() {
        return patternTokeniserType;
    }

}