Example usage for java.util Scanner hasNextLine

List of usage examples for java.util Scanner hasNextLine

Introduction

In this page you can find the example usage for java.util Scanner hasNextLine.

Prototype

public boolean hasNextLine() 

Source Link

Document

Returns true if there is another line in the input of this scanner.

Usage

From source file:com.joliciel.talismane.TalismaneConfig.java

/**
 * The rules to apply when running the parser.
 * @return/*from ww  w.  j  a  v  a2  s  .  com*/
 */
public List<ParserRule> getParserRules() {
    try {
        if (parserRules == null) {
            parserRules = new ArrayList<ParserRule>();
            if (parserRuleFilePath != null && parserRuleFilePath.equalsIgnoreCase("null")) {
                // add no rules! (not even built-in ones)
            } else {
                for (int i = 0; i <= 1; i++) {
                    Scanner rulesScanner = null;
                    if (i == 0) {
                        if (parserRulesReplace)
                            continue;
                        rulesScanner = this.implementation.getDefaultParserRulesScanner();
                    } else {
                        if (parserRuleFilePath != null && parserRuleFilePath.length() > 0) {
                            File parserRuleFile = new File(parserRuleFilePath);
                            rulesScanner = new Scanner(new BufferedReader(new InputStreamReader(
                                    new FileInputStream(parserRuleFile), this.getInputCharset().name())));
                        }
                    }

                    if (rulesScanner != null) {
                        List<String> ruleDescriptors = new ArrayList<String>();
                        while (rulesScanner.hasNextLine()) {
                            String ruleDescriptor = rulesScanner.nextLine();
                            if (ruleDescriptor.length() > 0) {
                                ruleDescriptors.add(ruleDescriptor);
                                LOG.trace(ruleDescriptor);
                            }
                        }
                        List<ParserRule> rules = this.getParserFeatureService().getRules(ruleDescriptors,
                                dynamiseFeatures);
                        parserRules.addAll(rules);

                    }
                }
            }
        }
        return parserRules;
    } catch (Exception e) {
        LogUtils.logError(LOG, e);
        throw new RuntimeException(e);
    }
}

From source file:com.joliciel.talismane.extensions.Extensions.java

/**
 * To be called just before running the Talismane command, to
 * prepare anything specifically required for extensions to function correctly.
 * @param config/* www  .  j a  va 2s .c  om*/
 * @param talismane
 */
public void prepareCommand(TalismaneConfig config, Talismane talismane) {
    try {
        if (command == null)
            return;

        TalismaneSession talismaneSession = config.getTalismaneService().getTalismaneSession();

        switch (command) {
        case toStandoff: {
            StandoffWriter standoffWriter = new StandoffWriter();
            talismane.setParseConfigurationProcessor(standoffWriter);
            break;
        }
        case toStandoffSentences: {
            InputStream inputStream = StandoffWriter.class.getResourceAsStream("standoffSentences.ftl");
            Reader templateReader = new BufferedReader(new InputStreamReader(inputStream));
            FreemarkerTemplateWriter templateWriter = new FreemarkerTemplateWriter(templateReader);

            talismane.setParseConfigurationProcessor(templateWriter);
            break;
        }
        case fromStandoff: {
            Scanner scanner = new Scanner(config.getReader());
            StandoffReader standoffReader = new StandoffReader(talismaneSession, scanner);
            standoffReader.setParserService(config.getParserService());
            standoffReader.setPosTaggerService(config.getPosTaggerService());
            standoffReader.setTokeniserService(config.getTokeniserService());
            standoffReader.setTokenFilterService(config.getTokenFilterService());

            config.setParserCorpusReader(standoffReader);
            break;
        }
        case corpusStatistics: {
            CorpusStatistics stats = new CorpusStatistics(talismaneSession);

            if (referenceStatsPath != null) {
                File referenceStatsFile = new File(referenceStatsPath);
                CorpusStatistics referenceStats = CorpusStatistics.loadFromFile(referenceStatsFile);
                stats.setReferenceWords(referenceStats.getWords());
                stats.setReferenceLowercaseWords(referenceStats.getLowerCaseWords());
            }

            File csvFile = new File(config.getOutDir(), config.getBaseName() + "_stats.csv");
            csvFile.delete();
            csvFile.createNewFile();
            Writer csvFileWriter = new BufferedWriter(
                    new OutputStreamWriter(new FileOutputStream(csvFile, false), "UTF8"));
            stats.setWriter(csvFileWriter);

            File serializationFile = new File(config.getOutDir(), config.getBaseName() + "_stats.zip");
            serializationFile.delete();
            stats.setSerializationFile(serializationFile);

            ParserRegexBasedCorpusReader corpusReader = (ParserRegexBasedCorpusReader) config
                    .getParserCorpusReader();
            corpusReader.setPredictTransitions(false);

            talismane.setParseConfigurationProcessor(stats);
            break;
        }
        case posTaggerStatistics: {
            PosTaggerStatistics stats = new PosTaggerStatistics(talismaneSession);

            if (referenceStatsPath != null) {
                File referenceStatsFile = new File(referenceStatsPath);
                PosTaggerStatistics referenceStats = PosTaggerStatistics.loadFromFile(referenceStatsFile);
                stats.setReferenceWords(referenceStats.getWords());
                stats.setReferenceLowercaseWords(referenceStats.getLowerCaseWords());
            }

            File csvFile = new File(config.getOutDir(), config.getBaseName() + "_stats.csv");
            csvFile.delete();
            csvFile.createNewFile();
            Writer csvFileWriter = new BufferedWriter(
                    new OutputStreamWriter(new FileOutputStream(csvFile, false), "UTF8"));
            stats.setWriter(csvFileWriter);

            File serializationFile = new File(config.getOutDir(), config.getBaseName() + "_stats.zip");
            serializationFile.delete();
            stats.setSerializationFile(serializationFile);

            talismane.setPosTagSequenceProcessor(stats);
            break;
        }
        case modifyCorpus: {
            if (corpusRulesPath == null)
                throw new TalismaneException("corpusRules is required for modifyCorpus command");

            List<String> corpusRules = new ArrayList<String>();
            File corpusRulesFile = new File(corpusRulesPath);
            Scanner scanner = new Scanner(
                    new BufferedReader(new InputStreamReader(new FileInputStream(corpusRulesFile), "UTF-8")));

            while (scanner.hasNextLine()) {
                corpusRules.add(scanner.nextLine());
            }
            CorpusModifier corpusModifier = new CorpusModifier(config.getParseConfigurationProcessor(),
                    corpusRules);
            talismane.setParseConfigurationProcessor(corpusModifier);
            break;
        }
        case projectify: {
            CorpusProjectifier projectifier = new CorpusProjectifier(config.getParseConfigurationProcessor());
            talismane.setParseConfigurationProcessor(projectifier);
            break;
        }
        default: {
            throw new RuntimeException("Unknown command: " + command);
        }
        }
    } catch (IOException e) {
        LogUtils.logError(LOG, e);
        throw new RuntimeException(e);
    }
}

From source file:com.joliciel.talismane.TalismaneConfigImpl.java

public void loadParameters(Map<String, String> args) {
    try {/* w w w  . j a va2  s  . c  om*/
        if (args.size() == 0) {
            System.out.println("Talismane usage instructions: ");
            System.out.println("* indicates optional, + indicates default value");
            System.out.println("");
            System.out.println(
                    "Usage: command=analyse *startModule=[sentence+|tokenise|postag|parse] *endModule=[sentence|tokenise|postag|parse+] *inFile=[inFilePath, stdin if missing] *outFile=[outFilePath, stdout if missing] *template=[outputTemplatePath]");
            System.out.println("");
            System.out.println("Additional optional parameters:");
            System.out.println(
                    " *encoding=[UTF-8, ...] *includeDetails=[true|false+] posTaggerRules*=[posTaggerRuleFilePath] textFilters*=[regexFilterFilePath] *sentenceModel=[path] *tokeniserModel=[path] *posTaggerModel=[path] *parserModel=[path] *inputPatternFile=[inputPatternFilePath] *posTagSet=[posTagSetPath]");
            return;
        }

        String logConfigPath = args.get("logConfigFile");
        if (logConfigPath != null) {
            args.remove("logConfigFile");
            Properties props = new Properties();
            props.load(new FileInputStream(logConfigPath));
            PropertyConfigurator.configure(props);
        }

        String performanceConifPath = args.get("performanceConfigFile");
        if (performanceConifPath != null) {
            args.remove("performanceConfigFile");
            performanceConfigFile = this.getFile(performanceConifPath);
        }

        String encoding = null;
        String inputEncoding = null;
        String outputEncoding = null;
        String builtInTemplate = null;

        String posTagSetPath = null;
        String externalResourcePath = null;
        String transitionSystemStr = null;

        String languagePackPath = null;

        for (Entry<String, String> arg : args.entrySet()) {
            String argName = arg.getKey();
            String argValue = arg.getValue();
            if (argName.equals("command")) {
                String commandString = argValue;
                if (commandString.equals("analyze"))
                    commandString = "analyse";

                command = Command.valueOf(commandString);
            } else if (argName.equals("option")) {
                option = Option.valueOf(argValue);
            } else if (argName.equals("mode")) {
                mode = Mode.valueOf(argValue);
            } else if (argName.equals("module")) {
                if (argValue.equalsIgnoreCase("sentence") || argValue.equalsIgnoreCase("sentenceDetector"))
                    module = Talismane.Module.SentenceDetector;
                else if (argValue.equalsIgnoreCase("tokenise") || argValue.equalsIgnoreCase("tokeniser"))
                    module = Talismane.Module.Tokeniser;
                else if (argValue.equalsIgnoreCase("postag") || argValue.equalsIgnoreCase("posTagger"))
                    module = Talismane.Module.PosTagger;
                else if (argValue.equalsIgnoreCase("parse") || argValue.equalsIgnoreCase("parser"))
                    module = Talismane.Module.Parser;
                else if (argValue.equalsIgnoreCase("language") || argValue.equalsIgnoreCase("languageDetector"))
                    module = Talismane.Module.LanguageDetector;
                else
                    throw new TalismaneException("Unknown module: " + argValue);
            } else if (argName.equals("startModule")) {
                if (argValue.equalsIgnoreCase("sentence") || argValue.equalsIgnoreCase("sentenceDetector"))
                    startModule = Talismane.Module.SentenceDetector;
                else if (argValue.equalsIgnoreCase("tokenise") || argValue.equalsIgnoreCase("tokeniser"))
                    startModule = Talismane.Module.Tokeniser;
                else if (argValue.equalsIgnoreCase("postag") || argValue.equalsIgnoreCase("posTagger"))
                    startModule = Talismane.Module.PosTagger;
                else if (argValue.equalsIgnoreCase("parse") || argValue.equalsIgnoreCase("parser"))
                    startModule = Talismane.Module.Parser;
                else
                    throw new TalismaneException("Unknown startModule: " + argValue);
            } else if (argName.equals("endModule")) {
                if (argValue.equalsIgnoreCase("sentence") || argValue.equalsIgnoreCase("sentenceDetector"))
                    endModule = Talismane.Module.SentenceDetector;
                else if (argValue.equalsIgnoreCase("tokenise") || argValue.equalsIgnoreCase("tokeniser"))
                    endModule = Talismane.Module.Tokeniser;
                else if (argValue.equalsIgnoreCase("postag") || argValue.equalsIgnoreCase("posTagger"))
                    endModule = Talismane.Module.PosTagger;
                else if (argValue.equalsIgnoreCase("parse") || argValue.equalsIgnoreCase("parser"))
                    endModule = Talismane.Module.Parser;
                else
                    throw new TalismaneException("Unknown endModule: " + argValue);
            } else if (argName.equals("inFile"))
                inFilePath = argValue;
            else if (argName.equals("inDir"))
                inDirPath = argValue;
            else if (argName.equals("outFile"))
                outFilePath = argValue;
            else if (argName.equals("outDir"))
                outDirPath = argValue;
            else if (argName.equals("template"))
                templatePath = argValue;
            else if (argName.equals("builtInTemplate"))
                builtInTemplate = argValue;
            else if (argName.equals("encoding")) {
                if (inputEncoding != null || outputEncoding != null)
                    throw new TalismaneException(
                            "The parameter 'encoding' cannot be used with 'inputEncoding' or 'outputEncoding'");
                encoding = argValue;
            } else if (argName.equals("inputEncoding")) {
                if (encoding != null)
                    throw new TalismaneException(
                            "The parameter 'encoding' cannot be used with 'inputEncoding' or 'outputEncoding'");
                inputEncoding = argValue;
            } else if (argName.equals("outputEncoding")) {
                if (encoding != null)
                    throw new TalismaneException(
                            "The parameter 'encoding' cannot be used with 'inputEncoding' or 'outputEncoding'");
                outputEncoding = argValue;
            } else if (argName.equals("includeDetails"))
                includeDetails = argValue.equalsIgnoreCase("true");
            else if (argName.equals("propagateBeam"))
                propagateBeam = argValue.equalsIgnoreCase("true");
            else if (argName.equals("beamWidth"))
                beamWidth = Integer.parseInt(argValue);
            else if (argName.equals("languageModel"))
                languageModelFilePath = argValue;
            else if (argName.equals("sentenceModel"))
                sentenceModelFilePath = argValue;
            else if (argName.equals("tokeniserModel"))
                tokeniserModelFilePath = argValue;
            else if (argName.equals("posTaggerModel"))
                posTaggerModelFilePath = argValue;
            else if (argName.equals("parserModel"))
                parserModelFilePath = argValue;
            else if (argName.equals("inputPatternFile"))
                inputPatternFilePath = argValue;
            else if (argName.equals("inputPattern"))
                inputRegex = argValue;
            else if (argName.equals("evaluationPatternFile"))
                evaluationPatternFilePath = argValue;
            else if (argName.equals("evaluationPattern"))
                evaluationRegex = argValue;
            else if (argName.equals("posTaggerRules")) {
                if (argValue.startsWith("replace:")) {
                    posTaggerRulesReplace = true;
                    posTaggerRuleFilePath = argValue.substring("replace:".length());
                } else {
                    posTaggerRuleFilePath = argValue;
                }
            } else if (argName.equals("parserRules")) {
                if (argValue.startsWith("replace:")) {
                    parserRulesReplace = true;
                    parserRuleFilePath = argValue.substring("replace:".length());
                } else {
                    parserRuleFilePath = argValue;
                }
            } else if (argName.equals("posTagSet"))
                posTagSetPath = argValue;
            else if (argName.equals("textFilters")) {
                if (argValue.startsWith("replace:")) {
                    textFiltersReplace = true;
                    textFiltersPath = argValue.substring("replace:".length());
                } else {
                    textFiltersPath = argValue;
                }
            } else if (argName.equals("tokenFilters")) {
                if (argValue.startsWith("replace:")) {
                    tokenFiltersReplace = true;
                    tokenFiltersPath = argValue.substring("replace:".length());
                } else {
                    tokenFiltersPath = argValue;
                }
            } else if (argName.equals("tokenSequenceFilters")) {
                if (argValue.startsWith("replace:")) {
                    tokenSequenceFiltersReplace = true;
                    tokenSequenceFilterPath = argValue.substring("replace:".length());
                } else {
                    tokenSequenceFilterPath = argValue;
                }
            } else if (argName.equals("posTagSequenceFilters"))
                posTagSequenceFilterPath = argValue;
            else if (argName.equals("logStats"))
                logStats = argValue.equalsIgnoreCase("true");
            else if (argName.equals("newline"))
                newlineMarker = MarkerFilterType.valueOf(argValue);
            else if (argName.equals("fileName"))
                fileName = argValue;
            else if (argName.equals("processByDefault"))
                processByDefault = argValue.equalsIgnoreCase("true");
            else if (argName.equals("maxParseAnalysisTime"))
                maxParseAnalysisTime = Integer.parseInt(argValue);
            else if (argName.equals("minFreeMemory"))
                minFreeMemory = Integer.parseInt(argValue);
            else if (argName.equals("transitionSystem"))
                transitionSystemStr = argValue;
            else if (argName.equals("sentenceCount"))
                maxSentenceCount = Integer.parseInt(argValue);
            else if (argName.equals("startSentence"))
                startSentence = Integer.parseInt(argValue);
            else if (argName.equals("endBlockCharCode"))
                endBlockCharacter = (char) Integer.parseInt(argValue);
            else if (argName.equals("outputGuesses"))
                outputGuesses = argValue.equalsIgnoreCase("true");
            else if (argName.equals("outputGuessCount"))
                outputGuessCount = Integer.parseInt(argValue);
            else if (argName.equals("suffix"))
                suffix = argValue;
            else if (argName.equals("includeDistanceFScores"))
                includeDistanceFScores = argValue.equalsIgnoreCase("true");
            else if (argName.equals("includeTransitionLog"))
                includeTransitionLog = argValue.equalsIgnoreCase("true");
            else if (argName.equals("evaluationFile"))
                evaluationFilePath = argValue;
            else if (argName.equals("labeledEvaluation"))
                labeledEvaluation = argValue.equalsIgnoreCase("true");
            else if (argName.equals("tokeniserBeamWidth"))
                tokeniserBeamWidth = Integer.parseInt(argValue);
            else if (argName.equals("posTaggerBeamWidth"))
                posTaggerBeamWidth = Integer.parseInt(argValue);
            else if (argName.equals("parserBeamWidth"))
                parserBeamWidth = Integer.parseInt(argValue);
            else if (argName.equals("propagateTokeniserBeam"))
                propagateTokeniserBeam = argValue.equalsIgnoreCase("true");
            else if (argName.equals("blockSize"))
                blockSize = Integer.parseInt(argValue);
            else if (argName.equals("crossValidationSize"))
                crossValidationSize = Integer.parseInt(argValue);
            else if (argName.equals("includeIndex"))
                includeIndex = Integer.parseInt(argValue);
            else if (argName.equals("excludeIndex"))
                excludeIndex = Integer.parseInt(argValue);
            else if (argName.equals("dynamiseFeatures"))
                dynamiseFeatures = argValue.equalsIgnoreCase("true");
            else if (argName.equals("predictTransitions"))
                predictTransitions = argValue.equalsIgnoreCase("true");
            else if (argName.equals("lexicon")) {
                if (argValue.startsWith("replace:")) {
                    replaceLexicon = true;
                    lexiconPath = argValue.substring("replace:".length());
                } else {
                    lexiconPath = argValue;
                }
            } else if (argName.equals("perceptronScoring")) {
                PerceptronScoring perceptronScoring = PerceptronScoring.valueOf(argValue);
                MachineLearningSession.setPerceptronScoring(perceptronScoring);
            } else if (argName.equals("parseComparisonStrategy")) {
                parseComparisonStrategyType = ParseComparisonStrategyType.valueOf(argValue);
            } else if (argName.equals("sentenceReader")) {
                sentenceReaderPath = argValue;
            } else if (argName.equals("skipLabel")) {
                skipLabel = argValue;
            } else if (argName.equals("errorLabels")) {
                errorLabels = new HashSet<String>();
                String[] labels = argValue.split(",");
                for (String label : labels) {
                    errorLabels.add(label);
                }
            } else if (argName.equals("earlyStop")) {
                earlyStop = argValue.equalsIgnoreCase("true");
            } else if (argName.equals("languageFeatures")) {
                languageFeaturePath = argValue;
            } else if (argName.equals("sentenceFeatures")) {
                sentenceFeaturePath = argValue;
            } else if (argName.equals("tokeniserFeatures")) {
                tokeniserFeaturePath = argValue;
            } else if (argName.equals("tokeniserPatterns")) {
                tokeniserPatternFilePath = argValue;
            } else if (argName.equals("posTaggerFeatures")) {
                posTaggerFeaturePath = argValue;
            } else if (argName.equals("parserFeatures")) {
                parserFeaturePath = argValue;
            } else if (argName.equals("externalResources")) {
                externalResourcePath = argValue;
            } else if (argName.equals("testWords")) {
                String[] parts = argValue.split(";");
                testWords = new HashSet<String>();
                for (String part : parts)
                    testWords.add(part);
            } else if (argName.equals("includeLexiconCoverage")) {
                includeLexiconCoverage = argValue.equalsIgnoreCase("true");
            } else if (argName.equals("includeUnknownWordResults")) {
                includeUnknownWordResults = argValue.equalsIgnoreCase("true");
            } else if (argName.equals("iterations"))
                iterations = Integer.parseInt(argValue);
            else if (argName.equals("cutoff"))
                cutoff = Integer.parseInt(argValue);
            else if (argName.equals("dependencyLabels"))
                dependencyLabelPath = argValue;
            else if (argName.equals("parsingConstrainer"))
                parsingConstrainerPath = argValue;
            else if (argName.equals("algorithm"))
                algorithm = MachineLearningAlgorithm.valueOf(argValue);
            else if (argName.equals("linearSVMSolver"))
                solverType = LinearSVMSolverType.valueOf(argValue);
            else if (argName.equals("linearSVMCost"))
                constraintViolationCost = Double.parseDouble(argValue);
            else if (argName.equals("linearSVMEpsilon"))
                epsilon = Double.parseDouble(argValue);
            else if (argName.equals("perceptronTolerance"))
                perceptronTolerance = Double.parseDouble(argValue);
            else if (argName.equals("averageAtIntervals"))
                averageAtIntervals = argValue.equalsIgnoreCase("true");
            else if (argName.equals("perceptronObservationPoints")) {
                String[] points = argValue.split(",");
                perceptronObservationPoints = new ArrayListNoNulls<Integer>();
                for (String point : points)
                    perceptronObservationPoints.add(Integer.parseInt(point));
            } else if (argName.equals("tokeniserType")) {
                tokeniserType = TokeniserType.valueOf(argValue);
            } else if (argName.equals("patternTokeniser"))
                patternTokeniserType = PatternTokeniserType.valueOf(argValue);
            else if (argName.equals("excludeFile")) {
                excludeFileName = argValue;
            } else if (argName.equals("port")) {
                port = Integer.parseInt(argValue);
            } else if (argName.equals("preloadLexicon")) {
                preloadLexicon = argValue.equalsIgnoreCase("true");
            } else if (argName.equals("locale")) {
                locale = Locale.forLanguageTag(argValue);
            } else if (argName.equals("languageCorpusMap")) {
                languageCorpusMapPath = argValue;
            } else if (argName.equals("corpusLexicalEntryRegex")) {
                corpusLexicalEntryRegexPath = argValue;
            } else if (argName.equals("languagePack")) {
                languagePackPath = argValue;
            } else {
                System.out.println("Unknown argument: " + argName);
                throw new RuntimeException("Unknown argument: " + argName);
            }
        }

        if (command == null)
            throw new TalismaneException("No command provided.");

        if (!(implementation instanceof LanguagePackImplementation) && languagePackPath != null)
            throw new TalismaneException("The implementation " + implementation.getClass().getSimpleName()
                    + " does not accept language packs");

        if (implementation instanceof LanguagePackImplementation) {
            if (languagePackPath != null) {
                File languagePackFile = this.getFile(languagePackPath);
                if (!languagePackFile.exists())
                    throw new TalismaneException(
                            "languagePack: could not find file: " + languagePackFile.getPath());

                LOG.debug("Setting language pack to " + languagePackFile.getPath());
                ((LanguagePackImplementation) implementation).setLanguagePack(languagePackFile);
            }
        }

        if (command.equals(Command.evaluate)) {
            if (outDirPath.length() == 0)
                throw new TalismaneException("Missing argument: outdir");
        }

        if (startModule == null)
            startModule = module;
        if (startModule == null)
            startModule = Module.SentenceDetector;
        if (endModule == null)
            endModule = module;
        if (endModule == null)
            endModule = Module.Parser;
        if (module == null)
            module = endModule;

        if (command == Command.train) {
            if (module == Module.LanguageDetector) {
                if (languageModelFilePath == null)
                    throw new TalismaneException(
                            "languageModel is required when training a language detector model");
                if (languageCorpusMapPath == null)
                    throw new TalismaneException(
                            "languageCorpusMap is required when training a language detector model");
                if (languageFeaturePath == null)
                    throw new TalismaneException(
                            "languageFeatures is required when training a language detector model");
            } else if (module == Module.SentenceDetector) {
                if (sentenceModelFilePath == null)
                    throw new TalismaneException(
                            "sentenceModel is required when training a sentence detector model");
                if (sentenceFeaturePath == null)
                    throw new TalismaneException(
                            "sentenceFeatures is required when training a sentence detector model");
            } else if (module == Module.Tokeniser) {
                if (tokeniserModelFilePath == null)
                    throw new TalismaneException("tokeniserModel is required when training a tokeniser model");
                if (tokeniserFeaturePath == null)
                    throw new TalismaneException(
                            "tokeniserFeatures is required when training a tokeniser model");
            } else if (module == Module.PosTagger) {
                if (posTaggerModelFilePath == null)
                    throw new TalismaneException("posTaggerModel is required when training a posTagger model");
                if (posTaggerFeaturePath == null)
                    throw new TalismaneException(
                            "posTaggerFeatures is required when training a posTagger model");
            } else if (module == Module.Parser) {
                this.predictTransitions = true;

                if (parserModelFilePath == null)
                    throw new TalismaneException("parserModel is required when training a parser model");
                if (parserFeaturePath == null)
                    throw new TalismaneException("parserFeatures is required when training a parser model");
            }
        }

        if (builtInTemplate != null) {
            if (builtInTemplate.equalsIgnoreCase("with_location")) {
                tokeniserTemplateName = "tokeniser_template_with_location.ftl";
                posTaggerTemplateName = "posTagger_template_with_location.ftl";
                parserTemplateName = "parser_conll_template_with_location.ftl";
            } else if (builtInTemplate.equalsIgnoreCase("with_prob")) {
                tokeniserTemplateName = "tokeniser_template_with_prob.ftl";
                posTaggerTemplateName = "posTagger_template_with_prob.ftl";
                parserTemplateName = "parser_conll_template_with_prob.ftl";
            } else if (builtInTemplate.equalsIgnoreCase("with_comments")) {
                posTaggerTemplateName = "posTagger_template_with_comments.ftl";
                parserTemplateName = "parser_conll_template_with_comments.ftl";
            } else {
                throw new TalismaneException("Unknown builtInTemplate: " + builtInTemplate);
            }
        }

        if (posTaggerBeamWidth < 0)
            posTaggerBeamWidth = beamWidth;
        if (parserBeamWidth < 0)
            parserBeamWidth = beamWidth;

        inputCharset = Charset.defaultCharset();
        outputCharset = Charset.defaultCharset();
        if (encoding != null) {
            inputCharset = Charset.forName(encoding);
            outputCharset = Charset.forName(encoding);
        } else {
            if (inputEncoding != null)
                inputCharset = Charset.forName(inputEncoding);
            if (outputEncoding != null)
                outputCharset = Charset.forName(outputEncoding);
        }

        if (fileName == null && inFilePath != null) {
            fileName = inFilePath;
        }

        if (posTagSetPath != null) {
            File posTagSetFile = this.getFile(posTagSetPath);
            Scanner posTagSetScanner = new Scanner(new BufferedReader(
                    new InputStreamReader(new FileInputStream(posTagSetFile), this.getInputCharset().name())));

            PosTagSet posTagSet = this.getPosTaggerService().getPosTagSet(posTagSetScanner);
            talismaneSession.setPosTagSet(posTagSet);
        }

        if (transitionSystemStr != null) {
            TransitionSystem transitionSystem = null;
            if (transitionSystemStr.equalsIgnoreCase("ShiftReduce")) {
                transitionSystem = this.getParserService().getShiftReduceTransitionSystem();
            } else if (transitionSystemStr.equalsIgnoreCase("ArcEager")) {
                transitionSystem = this.getParserService().getArcEagerTransitionSystem();
            } else {
                throw new TalismaneException("Unknown transition system: " + transitionSystemStr);
            }

            if (dependencyLabelPath != null) {
                File dependencyLabelFile = this.getFile(dependencyLabelPath);
                Scanner depLabelScanner = new Scanner(new BufferedReader(
                        new InputStreamReader(new FileInputStream(dependencyLabelFile), "UTF-8")));
                List<String> dependencyLabels = new ArrayListNoNulls<String>();
                while (depLabelScanner.hasNextLine()) {
                    String dependencyLabel = depLabelScanner.nextLine();
                    if (!dependencyLabel.startsWith("#"))
                        dependencyLabels.add(dependencyLabel);
                }
                transitionSystem.setDependencyLabels(dependencyLabels);
            }

            talismaneSession.setTransitionSystem(transitionSystem);
        }

        if (this.lexiconPath != null) {
            File lexiconFile = this.getFile(lexiconPath);
            if (!lexiconFile.exists())
                throw new TalismaneException("lexicon: File " + lexiconPath + " does not exist");

            LexiconDeserializer lexiconDeserializer = new LexiconDeserializer(talismaneSession);
            List<PosTaggerLexicon> lexicons = lexiconDeserializer.deserializeLexicons(lexiconFile);
            for (PosTaggerLexicon oneLexicon : lexicons) {
                talismaneSession.addLexicon(oneLexicon);
            }

            if (!replaceLexicon) {
                List<PosTaggerLexicon> defaultLexicons = this.implementation.getDefaultLexicons();
                if (defaultLexicons != null) {
                    for (PosTaggerLexicon oneLexicon : defaultLexicons) {
                        talismaneSession.addLexicon(oneLexicon);
                    }
                }
            }
        }

        if (externalResourcePath != null) {
            externalResourceFinder = this.getMachineLearningService().getExternalResourceFinder();

            List<String> paths = new ArrayListNoNulls<String>();
            if (externalResourcePath != null && externalResourcePath.length() > 0) {
                LOG.info("externalResourcePath: " + externalResourcePath);
                String[] parts = externalResourcePath.split(";");
                for (String part : parts)
                    paths.add(part);
            }

            for (String path : paths) {
                LOG.info("Reading external resources from " + path);
                if (path.length() > 0) {
                    File externalResourceFile = this.getFile(path);
                    externalResourceFinder.addExternalResources(externalResourceFile);
                }
            }

            ExternalResourceFinder parserResourceFinder = this.getParserFeatureService()
                    .getExternalResourceFinder();
            ExternalResourceFinder posTaggerResourceFinder = this.getPosTaggerFeatureService()
                    .getExternalResourceFinder();
            ExternalResourceFinder tokeniserResourceFinder = this.getTokenFeatureService()
                    .getExternalResourceFinder();
            ExternalResourceFinder sentenceResourceFinder = this.getSentenceDetectorFeatureService()
                    .getExternalResourceFinder();
            for (ExternalResource<?> externalResource : externalResourceFinder.getExternalResources()) {
                parserResourceFinder.addExternalResource(externalResource);
                posTaggerResourceFinder.addExternalResource(externalResource);
                tokeniserResourceFinder.addExternalResource(externalResource);
                sentenceResourceFinder.addExternalResource(externalResource);
            }

            ExternalResourceFinder tokenFilterResourceFinder = this.getTokenFilterService()
                    .getExternalResourceFinder();
            for (ExternalWordList externalWordList : externalResourceFinder.getExternalWordLists()) {
                tokenFilterResourceFinder.addExternalWordList(externalWordList);
            }
        }
    } catch (IOException e) {
        LogUtils.logError(LOG, e);
        throw new RuntimeException(e);
    }
}

From source file:com.joliciel.talismane.TalismaneConfigImpl.java

/**
 * Text marker filters are applied to raw text segments extracted from the stream, 3 segments at a time.
 * This means that if a particular marker crosses segment borders, it is handled correctly.
 * @return//from  w  w  w .j a  v  a 2 s . co  m
 */
@Override
public List<TextMarkerFilter> getTextMarkerFilters() {
    try {
        if (textMarkerFilters == null) {
            textMarkerFilters = new ArrayListNoNulls<TextMarkerFilter>();

            // insert sentence breaks at end of block
            this.addTextMarkerFilter(this.getFilterService().getRegexMarkerFilter(
                    new MarkerFilterType[] { MarkerFilterType.SENTENCE_BREAK }, "" + endBlockCharacter,
                    blockSize));

            // handle newline as requested
            if (newlineMarker.equals(MarkerFilterType.SENTENCE_BREAK))
                this.addTextMarkerFilter(this.getFilterService().getNewlineEndOfSentenceMarker());
            else if (newlineMarker.equals(MarkerFilterType.SPACE))
                this.addTextMarkerFilter(this.getFilterService().getNewlineSpaceMarker());

            // get rid of duplicate white-space always
            this.addTextMarkerFilter(this.getFilterService().getDuplicateWhiteSpaceFilter());

            List<String> paths = new ArrayListNoNulls<String>();
            if (textFiltersPath != null && textFiltersPath.length() > 0) {
                LOG.debug("textFiltersPath: " + textFiltersPath);
                String[] parts = textFiltersPath.split(";");
                for (String part : parts)
                    paths.add(part);
            }
            if (!textFiltersReplace) {
                // default text filter path
                paths.add("");
            }

            for (String path : paths) {
                LOG.debug("Text marker filters");
                Scanner textFilterScanner = null;
                if (path.length() > 0) {
                    LOG.debug("From: " + path);
                    File textFilterFile = this.getFile(path);
                    if (!textFilterFile.exists()) {
                        throw new TalismaneException("textFilters: File " + path + " does not exist");
                    }
                    textFilterScanner = new Scanner(new BufferedReader(new InputStreamReader(
                            new FileInputStream(textFilterFile), this.getInputCharset().name())));
                } else {
                    LOG.debug("From default");
                    textFilterScanner = this.implementation.getDefaultTextMarkerFiltersScanner();
                }
                if (textFilterScanner != null) {
                    while (textFilterScanner.hasNextLine()) {
                        String descriptor = textFilterScanner.nextLine();
                        LOG.debug(descriptor);
                        if (descriptor.length() > 0 && !descriptor.startsWith("#")) {
                            TextMarkerFilter textMarkerFilter = this.getFilterService()
                                    .getTextMarkerFilter(descriptor, blockSize);
                            this.addTextMarkerFilter(textMarkerFilter);
                        }
                    }
                }
            }

        }
        return textMarkerFilters;
    } catch (Exception e) {
        LogUtils.logError(LOG, e);
        throw new RuntimeException(e);
    }
}

From source file:au.org.emii.portal.composer.MapComposer.java

public void loadUserSession(String sessionid) {
    Scanner scanner = null;
    try {//  ww w  .j a  va2  s.co  m

        String sfld = getSettingsSupplementary().getProperty(StringConstants.ANALYSIS_OUTPUT_DIR) + "session/"
                + sessionid;

        File sessfolder = new File(sfld);
        if (!sessfolder.exists()) {
            showMessage("Session information does not exist. Please provide a valid session id");
            return;
        }

        scanner = new Scanner(new File(sfld + "/details.txt"));

        // first grab the zoom level and bounding box
        String[] mapdetails = scanner.nextLine().split(",");

        BoundingBox bb = new BoundingBox();
        bb.setMinLongitude(Float.parseFloat(mapdetails[1]));
        bb.setMinLatitude(Float.parseFloat(mapdetails[2]));
        bb.setMaxLongitude(Float.parseFloat(mapdetails[3]));
        bb.setMaxLatitude(Float.parseFloat(mapdetails[4]));
        openLayersJavascript.setAdditionalScript(openLayersJavascript.zoomToBoundingBox(bb, true));

        String[] scatterplotNames = null;
        while (scanner.hasNextLine()) {
            String line = scanner.nextLine();
            if (line.startsWith("scatterplotNames")) {
                scatterplotNames = line.substring(17).split("___");
            }
        }
        ArrayUtils.reverse(scatterplotNames);

        // ignore fields not found
        XStream xstream = new XStream(new DomDriver()) {

            protected MapperWrapper wrapMapper(MapperWrapper next) {
                return new MapperWrapper(next) {
                    public boolean shouldSerializeMember(Class definedIn, String fieldName) {
                        if (definedIn == Object.class || !super.shouldSerializeMember(definedIn, fieldName))
                            System.out.println("faled to read: " + definedIn + ", " + fieldName);

                        return definedIn != Object.class ? super.shouldSerializeMember(definedIn, fieldName)
                                : false;
                    }
                };
            }

            @Override
            public Object unmarshal(HierarchicalStreamReader reader) {
                Object o = super.unmarshal(reader);
                if (o instanceof BiocacheQuery)
                    ((BiocacheQuery) o).getFullQ(false);
                return o;
            }

            @Override
            public Object unmarshal(HierarchicalStreamReader reader, Object root) {
                Object o = super.unmarshal(reader, root);
                if (o instanceof BiocacheQuery)
                    ((BiocacheQuery) o).getFullQ(false);
                return o;
            }

            @Override
            public Object unmarshal(HierarchicalStreamReader reader, Object root, DataHolder dataHolder) {
                Object o = super.unmarshal(reader, root, dataHolder);
                if (o instanceof BiocacheQuery)
                    ((BiocacheQuery) o).getFullQ(false);
                return o;
            }
        };

        PersistenceStrategy strategy = new FilePersistenceStrategy(new File(sfld), xstream);

        List list = new XmlArrayList(strategy);

        ListIterator it = list.listIterator(list.size());
        int scatterplotIndex = 0;
        while (it.hasPrevious()) {
            Object o = it.previous();
            MapLayer ml = null;
            if (o instanceof MapLayer) {
                ml = (MapLayer) o;
                LOGGER.debug("Loading " + ml.getName() + " -> " + ml.isDisplayed());
                addUserDefinedLayerToMenu(ml, false);
            } else if (o instanceof ScatterplotDataDTO) {
                ScatterplotDataDTO spdata = (ScatterplotDataDTO) o;
                loadScatterplot(spdata, "My Scatterplot " + scatterplotIndex++);

            }

            if (ml != null) {
                addUserDefinedLayerToMenu(ml, true);
            }
        }

    } catch (Exception e) {
        try {

            File f = new File("/data/sessions/" + sessionid + ".txt");

            PrintWriter pw = new PrintWriter(f);

            e.printStackTrace(pw);

            pw.close();

        } catch (Exception ex) {

        }
        LOGGER.error("Unable to load session data", e);
        showMessage("Unable to load session data");

    } finally {
        if (scanner != null) {
            scanner.close();
        }
        try {

            File f = new File("/data/sessions/ok/" + sessionid + ".txt");

            FileUtils.writeStringToFile(f, "ok");

        } catch (Exception ex) {

        }
    }
}

From source file:com.joliciel.talismane.TalismaneConfigImpl.java

private List<PosTagSequenceFilter> getPosTagSequenceFilters(MachineLearningModel model) {
    try {/*  w  ww  .  j a  va2  s .  c o  m*/
        if (posTaggerPostProcessingFilters == null) {
            List<String> posTaggerPostProcessingFilterDescriptors = new ArrayListNoNulls<String>();
            posTaggerPostProcessingFilters = new ArrayListNoNulls<PosTagSequenceFilter>();

            List<Scanner> scanners = new ArrayListNoNulls<Scanner>();

            if (posTagSequenceFilterPath != null) {
                File filterFile = this.getFile(posTagSequenceFilterPath);
                Scanner scanner = new Scanner(new BufferedReader(
                        new InputStreamReader(new FileInputStream(filterFile), this.getInputCharset())));
                scanners.add(scanner);
            } else if (model != null) {
                List<String> modelDescriptors = model.getDescriptors()
                        .get(PosTagFilterService.POSTAG_POSTPROCESSING_FILTER_DESCRIPTOR_KEY);
                if (modelDescriptors != null) {
                    String modelDescriptorString = "";
                    if (modelDescriptors != null) {
                        for (String descriptor : modelDescriptors) {
                            modelDescriptorString += descriptor + "\n";
                        }
                    }
                    Scanner scanner = new Scanner(modelDescriptorString);
                    scanners.add(scanner);
                }
            }

            for (Scanner scanner : scanners) {
                while (scanner.hasNextLine()) {
                    String descriptor = scanner.nextLine();
                    LOG.debug(descriptor);
                    posTaggerPostProcessingFilterDescriptors.add(descriptor);
                    if (descriptor.length() > 0 && !descriptor.startsWith("#")) {
                        PosTagSequenceFilter filter = this.getPosTagFilterService()
                                .getPosTagSequenceFilter(descriptor);
                        posTaggerPostProcessingFilters.add(filter);
                    }
                }
            }

            this.getDescriptors().put(PosTagFilterService.POSTAG_POSTPROCESSING_FILTER_DESCRIPTOR_KEY,
                    posTaggerPostProcessingFilterDescriptors);

        }
        return posTaggerPostProcessingFilters;
    } catch (Exception e) {
        LogUtils.logError(LOG, e);
        throw new RuntimeException(e);
    }
}

From source file:com.joliciel.talismane.TalismaneConfigImpl.java

/**
 * The rules to apply when running the pos-tagger.
 * @return//w  ww  .j  a  va  2s .co  m
 */
@Override
public List<PosTaggerRule> getPosTaggerRules() {
    try {
        if (posTaggerRules == null) {
            posTaggerRules = new ArrayListNoNulls<PosTaggerRule>();
            for (int i = 0; i <= 1; i++) {
                Scanner rulesScanner = null;
                if (i == 0) {
                    if (posTaggerRulesReplace)
                        continue;
                    rulesScanner = this.implementation.getDefaultPosTaggerRulesScanner();
                } else {
                    if (posTaggerRuleFilePath != null && posTaggerRuleFilePath.length() > 0) {
                        File posTaggerRuleFile = this.getFile(posTaggerRuleFilePath);
                        if (!posTaggerRuleFile.exists()) {
                            throw new TalismaneException(
                                    "posTaggerRules: File " + posTaggerRuleFilePath + " does not exist");
                        }
                        rulesScanner = new Scanner(new BufferedReader(new InputStreamReader(
                                new FileInputStream(posTaggerRuleFile), this.getInputCharset().name())));
                    }
                }

                if (rulesScanner != null) {
                    List<String> ruleDescriptors = new ArrayListNoNulls<String>();
                    while (rulesScanner.hasNextLine()) {
                        String ruleDescriptor = rulesScanner.nextLine();
                        if (ruleDescriptor.length() > 0) {
                            ruleDescriptors.add(ruleDescriptor);
                            LOG.trace(ruleDescriptor);
                        }
                    }
                    List<PosTaggerRule> rules = this.getPosTaggerFeatureService().getRules(ruleDescriptors);
                    posTaggerRules.addAll(rules);

                }
            }
        }
        return posTaggerRules;
    } catch (Exception e) {
        LogUtils.logError(LOG, e);
        throw new RuntimeException(e);
    }
}

From source file:com.joliciel.talismane.TalismaneConfigImpl.java

/**
 * The rules to apply when running the parser.
 * @return/*from w  w w  . java  2s.co m*/
 */
@Override
public List<ParserRule> getParserRules() {
    try {
        if (parserRules == null) {
            parserRules = new ArrayListNoNulls<ParserRule>();
            if (parserRuleFilePath != null && parserRuleFilePath.equalsIgnoreCase("null")) {
                // add no rules! (not even built-in ones)
            } else {
                for (int i = 0; i <= 1; i++) {
                    Scanner rulesScanner = null;
                    if (i == 0) {
                        if (parserRulesReplace)
                            continue;
                        rulesScanner = this.implementation.getDefaultParserRulesScanner();
                    } else {
                        if (parserRuleFilePath != null && parserRuleFilePath.length() > 0) {
                            File parserRuleFile = this.getFile(parserRuleFilePath);
                            if (!parserRuleFile.exists()) {
                                throw new TalismaneException(
                                        "parserRules: File " + parserRuleFilePath + " does not exist");
                            }
                            rulesScanner = new Scanner(new BufferedReader(new InputStreamReader(
                                    new FileInputStream(parserRuleFile), this.getInputCharset().name())));
                        }
                    }

                    if (rulesScanner != null) {
                        List<String> ruleDescriptors = new ArrayListNoNulls<String>();
                        while (rulesScanner.hasNextLine()) {
                            String ruleDescriptor = rulesScanner.nextLine();
                            if (ruleDescriptor.length() > 0) {
                                ruleDescriptors.add(ruleDescriptor);
                                LOG.trace(ruleDescriptor);
                            }
                        }
                        List<ParserRule> rules = this.getParserFeatureService().getRules(ruleDescriptors,
                                dynamiseFeatures);
                        parserRules.addAll(rules);

                    }
                }
            }
        }
        return parserRules;
    } catch (Exception e) {
        LogUtils.logError(LOG, e);
        throw new RuntimeException(e);
    }
}

From source file:com.joliciel.talismane.TalismaneConfigImpl.java

/**
 * TokenFilters to be applied during analysis.
 * @return//from ww  w  . j  a va2 s  .  com
 */
private List<TokenSequenceFilter> getTokenSequenceFilters(MachineLearningModel model) {
    try {
        if (tokenSequenceFilters == null) {
            List<String> tokenSequenceFilterDescriptors = new ArrayListNoNulls<String>();
            tokenSequenceFilters = new ArrayListNoNulls<TokenSequenceFilter>();

            LOG.debug("Token sequence filters");

            List<Scanner> scanners = new ArrayListNoNulls<Scanner>();
            if (tokenSequenceFilterPath != null && tokenSequenceFilterPath.length() > 0) {
                LOG.debug("tokenSequenceFilterPath: " + tokenSequenceFilterPath);
                String[] parts = tokenSequenceFilterPath.split(";");
                for (String part : parts) {
                    if (part.length() > 0) {
                        LOG.debug("From: " + part);
                        File tokenSequenceFilterFile = this.getFile(part);
                        if (!tokenSequenceFilterFile.exists()) {
                            throw new TalismaneException(
                                    "tokenSequenceFilters: File " + part + " does not exist");
                        }
                        Scanner scanner = new Scanner(new BufferedReader(new InputStreamReader(
                                new FileInputStream(tokenSequenceFilterFile), this.getInputCharset())));
                        scanners.add(scanner);
                    }
                }
            }
            if (!tokenSequenceFiltersReplace) {
                if (model != null) {
                    LOG.debug("From model");
                    List<String> modelDescriptors = model.getDescriptors()
                            .get(PosTagFilterService.POSTAG_PREPROCESSING_FILTER_DESCRIPTOR_KEY);
                    String modelDescriptorString = "";
                    if (modelDescriptors != null) {
                        for (String descriptor : modelDescriptors) {
                            modelDescriptorString += descriptor + "\n";
                        }
                    }
                    Scanner scanner = new Scanner(modelDescriptorString);
                    scanners.add(scanner);
                } else {
                    // default token filters
                    LOG.debug("From default");
                    Scanner scanner = this.implementation.getDefaultTokenSequenceFiltersScanner();
                    scanners.add(scanner);
                }
            }

            for (Scanner scanner : scanners) {
                while (scanner.hasNextLine()) {
                    String descriptor = scanner.nextLine();
                    LOG.debug(descriptor);
                    tokenSequenceFilterDescriptors.add(descriptor);
                    if (descriptor.length() > 0 && !descriptor.startsWith("#")) {
                        TokenSequenceFilter tokenSequenceFilter = this.getTokenFilterService()
                                .getTokenSequenceFilter(descriptor);
                        if (tokenSequenceFilter instanceof NeedsTalismaneSession)
                            ((NeedsTalismaneSession) tokenSequenceFilter).setTalismaneSession(talismaneSession);
                        tokenSequenceFilters.add(tokenSequenceFilter);
                    }
                }
            }

            this.getDescriptors().put(PosTagFilterService.POSTAG_PREPROCESSING_FILTER_DESCRIPTOR_KEY,
                    tokenSequenceFilterDescriptors);
        }
        return tokenSequenceFilters;
    } catch (Exception e) {
        LogUtils.logError(LOG, e);
        throw new RuntimeException(e);
    }
}