Example usage for java.util Scanner hasNextLine

List of usage examples for java.util Scanner hasNextLine

Introduction

In this page you can find the example usage for java.util Scanner hasNextLine.

Prototype

public boolean hasNextLine() 

Source Link

Document

Returns true if there is another line in the input of this scanner.

Usage

From source file:com.joliciel.talismane.TalismaneConfig.java

public Set<PosTaggerFeature<?>> getPosTaggerFeatures() {
    if (posTaggerFeatures == null) {
        try {/*ww w.j av  a 2s .  c o m*/
            if (posTaggerFeaturePath != null) {
                LOG.debug("Found setting to change pos-tagger features");
                File posTaggerFeatureFile = new File(posTaggerFeaturePath);
                Scanner scanner = new Scanner(new BufferedReader(new InputStreamReader(
                        new FileInputStream(posTaggerFeatureFile), this.getInputCharset())));
                List<String> featureDescriptors = new ArrayList<String>();
                while (scanner.hasNextLine()) {
                    String descriptor = scanner.nextLine();
                    featureDescriptors.add(descriptor);
                    LOG.debug(descriptor);
                }
                posTaggerFeatures = this.getPosTaggerFeatureService().getFeatureSet(featureDescriptors);
                this.getDescriptors().put(MachineLearningModel.FEATURE_DESCRIPTOR_KEY, featureDescriptors);
            }
        } catch (Exception e) {
            LogUtils.logError(LOG, e);
            throw new RuntimeException(e);
        }
    }
    return posTaggerFeatures;
}

From source file:com.joliciel.talismane.TalismaneConfig.java

public Set<TokenPatternMatchFeature<?>> getTokenPatternMatchFeatures() {
    if (tokenPatternMatchFeatures == null) {
        try {/*ww w. ja  v a  2 s.  c  o m*/
            if (tokeniserFeaturePath != null) {
                LOG.debug("Found setting to change token pattern match features");
                File tokeniserFeatureFile = new File(tokeniserFeaturePath);
                Scanner scanner = new Scanner(new BufferedReader(new InputStreamReader(
                        new FileInputStream(tokeniserFeatureFile), this.getInputCharset())));
                List<String> featureDescriptors = new ArrayList<String>();
                while (scanner.hasNextLine()) {
                    String descriptor = scanner.nextLine();
                    featureDescriptors.add(descriptor);
                    LOG.debug(descriptor);
                }
                tokenPatternMatchFeatures = this.getTokenFeatureService()
                        .getTokenPatternMatchFeatureSet(featureDescriptors);
                this.getDescriptors().put(MachineLearningModel.FEATURE_DESCRIPTOR_KEY, featureDescriptors);
            }
        } catch (Exception e) {
            LogUtils.logError(LOG, e);
            throw new RuntimeException(e);
        }
    }
    return tokenPatternMatchFeatures;
}

From source file:com.joliciel.talismane.TalismaneConfig.java

public Set<TokeniserContextFeature<?>> getTokeniserContextFeatures() {
    if (tokeniserContextFeatures == null) {
        try {/*  w  w w.ja va2 s  .  co m*/
            if (tokeniserFeaturePath != null) {
                TokeniserPatternManager tokeniserPatternManager = this.getTokeniserPatternManager();
                LOG.debug("Found setting to change tokeniser context features");
                File tokeniserFeatureFile = new File(tokeniserFeaturePath);
                Scanner scanner = new Scanner(new BufferedReader(new InputStreamReader(
                        new FileInputStream(tokeniserFeatureFile), this.getInputCharset())));
                List<String> featureDescriptors = new ArrayList<String>();
                while (scanner.hasNextLine()) {
                    String descriptor = scanner.nextLine();
                    featureDescriptors.add(descriptor);
                    LOG.debug(descriptor);
                }
                tokeniserContextFeatures = this.getTokenFeatureService().getTokeniserContextFeatureSet(
                        featureDescriptors, tokeniserPatternManager.getParsedTestPatterns());
                this.getDescriptors().put(MachineLearningModel.FEATURE_DESCRIPTOR_KEY, featureDescriptors);
            }
        } catch (Exception e) {
            LogUtils.logError(LOG, e);
            throw new RuntimeException(e);
        }
    }
    return tokeniserContextFeatures;
}

From source file:io.realm.RealmTests.java

private List<String> getCharacterArray() {
    List<String> chars_array = new ArrayList<String>();
    String file = "assets/unicode_codepoints.csv";
    Scanner scanner = new Scanner(getClass().getClassLoader().getResourceAsStream(file), "UTF-8");
    int i = 0;// w  w w . j a  v a2 s  . co  m
    String currentUnicode = null;
    try {
        while (scanner.hasNextLine()) {
            currentUnicode = scanner.nextLine();
            char[] chars = Character.toChars(Integer.parseInt(currentUnicode, 16));
            String codePoint = new String(chars);
            chars_array.add(codePoint);
            i++;
        }
    } catch (Exception e) {
        fail("Failure, Codepoint: " + i + " / " + currentUnicode + " " + e.getMessage());
    }
    return chars_array;
}

From source file:io.realm.RealmTests.java

@Test
public void utf8Tests() {
    realm.beginTransaction();// ww  w.  j a va 2s  .  co  m
    realm.delete(AllTypes.class);
    realm.commitTransaction();

    String file = "assets/unicode_codepoints.csv";
    Scanner scanner = new Scanner(getClass().getClassLoader().getResourceAsStream(file), "UTF-8");
    int i = 0;
    String currentUnicode = null;
    try {
        realm.beginTransaction();
        while (scanner.hasNextLine()) {
            currentUnicode = scanner.nextLine();
            char[] chars = Character.toChars(Integer.parseInt(currentUnicode, 16));
            String codePoint = new String(chars);
            AllTypes o = realm.createObject(AllTypes.class);
            o.setColumnLong(i);
            o.setColumnString(codePoint);

            AllTypes realmType = realm.where(AllTypes.class).equalTo("columnLong", i).findFirst();
            if (i > 1) {
                assertEquals("Codepoint: " + i + " / " + currentUnicode, codePoint,
                        realmType.getColumnString()); // codepoint 0 is NULL, ignore for now.
            }
            i++;
        }
        realm.commitTransaction();
    } catch (Exception e) {
        fail("Failure, Codepoint: " + i + " / " + currentUnicode + " " + e.getMessage());
    }
}

From source file:com.joliciel.talismane.TalismaneConfig.java

void loadParameters(Map<String, String> args) throws Exception {
    if (args.size() == 0) {
        System.out.println("Talismane usage instructions: ");
        System.out.println("* indicates optional, + indicates default value");
        System.out.println("");
        System.out.println(/*w  w w . j  a  va 2s . c o m*/
                "Usage: command=analyse *startModule=[sentence+|tokenise|postag|parse] *endModule=[sentence|tokenise|postag|parse+] *inFile=[inFilePath, stdin if missing] *outFile=[outFilePath, stdout if missing] *template=[outputTemplatePath]");
        System.out.println("");
        System.out.println("Additional optional parameters:");
        System.out.println(
                " *encoding=[UTF-8, ...] *includeDetails=[true|false+] posTaggerRules*=[posTaggerRuleFilePath] textFilters*=[regexFilterFilePath] *sentenceModel=[path] *tokeniserModel=[path] *posTaggerModel=[path] *parserModel=[path] *inputPatternFile=[inputPatternFilePath] *posTagSet=[posTagSetPath]");
        return;
    }

    String logConfigPath = args.get("logConfigFile");
    if (logConfigPath != null) {
        args.remove("logConfigFile");
        Properties props = new Properties();
        props.load(new FileInputStream(logConfigPath));
        PropertyConfigurator.configure(props);
    }

    String performanceConifPath = args.get("performanceConfigFile");
    if (performanceConifPath != null) {
        args.remove("performanceConfigFile");
        performanceConfigFile = new File(performanceConifPath);
    }

    String encoding = null;
    String inputEncoding = null;
    String outputEncoding = null;
    String builtInTemplate = null;

    String posTagSetPath = null;
    String externalResourcePath = null;
    String transitionSystemStr = null;

    for (Entry<String, String> arg : args.entrySet()) {
        String argName = arg.getKey();
        String argValue = arg.getValue();
        if (argName.equals("command")) {
            String commandString = argValue;
            if (commandString.equals("analyze"))
                commandString = "analyse";

            command = Command.valueOf(commandString);
        } else if (argName.equals("option")) {
            option = Option.valueOf(argValue);
        } else if (argName.equals("mode")) {
            mode = Mode.valueOf(argValue);
        } else if (argName.equals("module")) {
            if (argValue.equalsIgnoreCase("sentence") || argValue.equalsIgnoreCase("sentenceDetector"))
                module = Talismane.Module.SentenceDetector;
            else if (argValue.equalsIgnoreCase("tokenise") || argValue.equalsIgnoreCase("tokeniser"))
                module = Talismane.Module.Tokeniser;
            else if (argValue.equalsIgnoreCase("postag") || argValue.equalsIgnoreCase("posTagger"))
                module = Talismane.Module.PosTagger;
            else if (argValue.equalsIgnoreCase("parse") || argValue.equalsIgnoreCase("parser"))
                module = Talismane.Module.Parser;
            else
                throw new TalismaneException("Unknown module: " + argValue);
        } else if (argName.equals("startModule")) {
            if (argValue.equalsIgnoreCase("sentence") || argValue.equalsIgnoreCase("sentenceDetector"))
                startModule = Talismane.Module.SentenceDetector;
            else if (argValue.equalsIgnoreCase("tokenise") || argValue.equalsIgnoreCase("tokeniser"))
                startModule = Talismane.Module.Tokeniser;
            else if (argValue.equalsIgnoreCase("postag") || argValue.equalsIgnoreCase("posTagger"))
                startModule = Talismane.Module.PosTagger;
            else if (argValue.equalsIgnoreCase("parse") || argValue.equalsIgnoreCase("parser"))
                startModule = Talismane.Module.Parser;
            else
                throw new TalismaneException("Unknown startModule: " + argValue);
        } else if (argName.equals("endModule")) {
            if (argValue.equalsIgnoreCase("sentence") || argValue.equalsIgnoreCase("sentenceDetector"))
                endModule = Talismane.Module.SentenceDetector;
            else if (argValue.equalsIgnoreCase("tokenise") || argValue.equalsIgnoreCase("tokeniser"))
                endModule = Talismane.Module.Tokeniser;
            else if (argValue.equalsIgnoreCase("postag") || argValue.equalsIgnoreCase("posTagger"))
                endModule = Talismane.Module.PosTagger;
            else if (argValue.equalsIgnoreCase("parse") || argValue.equalsIgnoreCase("parser"))
                endModule = Talismane.Module.Parser;
            else
                throw new TalismaneException("Unknown endModule: " + argValue);
        } else if (argName.equals("inFile"))
            inFilePath = argValue;
        else if (argName.equals("outFile"))
            outFilePath = argValue;
        else if (argName.equals("outDir"))
            outDirPath = argValue;
        else if (argName.equals("template"))
            templatePath = argValue;
        else if (argName.equals("builtInTemplate"))
            builtInTemplate = argValue;
        else if (argName.equals("encoding")) {
            if (inputEncoding != null || outputEncoding != null)
                throw new TalismaneException(
                        "The parameter 'encoding' cannot be used with 'inputEncoding' or 'outputEncoding'");
            encoding = argValue;
        } else if (argName.equals("inputEncoding")) {
            if (encoding != null)
                throw new TalismaneException(
                        "The parameter 'encoding' cannot be used with 'inputEncoding' or 'outputEncoding'");
            inputEncoding = argValue;
        } else if (argName.equals("outputEncoding")) {
            if (encoding != null)
                throw new TalismaneException(
                        "The parameter 'encoding' cannot be used with 'inputEncoding' or 'outputEncoding'");
            outputEncoding = argValue;
        } else if (argName.equals("includeDetails"))
            includeDetails = argValue.equalsIgnoreCase("true");
        else if (argName.equals("propagateBeam"))
            propagateBeam = argValue.equalsIgnoreCase("true");
        else if (argName.equals("beamWidth"))
            beamWidth = Integer.parseInt(argValue);
        else if (argName.equals("sentenceModel"))
            sentenceModelFilePath = argValue;
        else if (argName.equals("tokeniserModel"))
            tokeniserModelFilePath = argValue;
        else if (argName.equals("posTaggerModel"))
            posTaggerModelFilePath = argValue;
        else if (argName.equals("parserModel"))
            parserModelFilePath = argValue;
        else if (argName.equals("inputPatternFile"))
            inputPatternFilePath = argValue;
        else if (argName.equals("inputPattern"))
            inputRegex = argValue;
        else if (argName.equals("evaluationPatternFile"))
            evaluationPatternFilePath = argValue;
        else if (argName.equals("evaluationPattern"))
            evaluationRegex = argValue;
        else if (argName.equals("posTaggerRules")) {
            if (argValue.startsWith("replace:")) {
                posTaggerRulesReplace = true;
                posTaggerRuleFilePath = argValue.substring("replace:".length());
            } else {
                posTaggerRuleFilePath = argValue;
            }
        } else if (argName.equals("parserRules")) {
            if (argValue.startsWith("replace:")) {
                parserRulesReplace = true;
                parserRuleFilePath = argValue.substring("replace:".length());
            } else {
                parserRuleFilePath = argValue;
            }
        } else if (argName.equals("posTagSet"))
            posTagSetPath = argValue;
        else if (argName.equals("textFilters"))
            textFiltersPath = argValue;
        else if (argName.equals("tokenFilters"))
            tokenFiltersPath = argValue;
        else if (argName.equals("tokenSequenceFilters"))
            tokenSequenceFilterPath = argValue;
        else if (argName.equals("posTagSequenceFilters"))
            posTagSequenceFilterPath = argValue;
        else if (argName.equals("logStats"))
            logStats = argValue.equalsIgnoreCase("true");
        else if (argName.equals("newline"))
            newlineMarker = MarkerFilterType.valueOf(argValue);
        else if (argName.equals("fileName"))
            fileName = argValue;
        else if (argName.equals("processByDefault"))
            processByDefault = argValue.equalsIgnoreCase("true");
        else if (argName.equals("maxParseAnalysisTime"))
            maxParseAnalysisTime = Integer.parseInt(argValue);
        else if (argName.equals("minFreeMemory"))
            minFreeMemory = Integer.parseInt(argValue);
        else if (argName.equals("transitionSystem"))
            transitionSystemStr = argValue;
        else if (argName.equals("sentenceCount"))
            maxSentenceCount = Integer.parseInt(argValue);
        else if (argName.equals("endBlockCharCode"))
            endBlockCharacter = (char) Integer.parseInt(argValue);
        else if (argName.equals("outputGuesses"))
            outputGuesses = argValue.equalsIgnoreCase("true");
        else if (argName.equals("outputGuessCount"))
            outputGuessCount = Integer.parseInt(argValue);
        else if (argName.equals("suffix"))
            suffix = argValue;
        else if (argName.equals("includeDistanceFScores"))
            includeDistanceFScores = argValue.equalsIgnoreCase("true");
        else if (argName.equals("evaluationFile"))
            evaluationFilePath = argValue;
        else if (argName.equals("labeledEvaluation"))
            labeledEvaluation = argValue.equalsIgnoreCase("true");
        else if (argName.equals("tokeniserBeamWidth"))
            tokeniserBeamWidth = Integer.parseInt(argValue);
        else if (argName.equals("posTaggerBeamWidth"))
            posTaggerBeamWidth = Integer.parseInt(argValue);
        else if (argName.equals("parserBeamWidth"))
            parserBeamWidth = Integer.parseInt(argValue);
        else if (argName.equals("propagateTokeniserBeam"))
            propagateTokeniserBeam = argValue.equalsIgnoreCase("true");
        else if (argName.equals("blockSize"))
            blockSize = Integer.parseInt(argValue);
        else if (argName.equals("crossValidationSize"))
            crossValidationSize = Integer.parseInt(argValue);
        else if (argName.equals("includeIndex"))
            includeIndex = Integer.parseInt(argValue);
        else if (argName.equals("excludeIndex"))
            excludeIndex = Integer.parseInt(argValue);
        else if (argName.equals("dynamiseFeatures"))
            dynamiseFeatures = argValue.equalsIgnoreCase("true");
        else if (argName.equals("predictTransitions"))
            predictTransitions = argValue.equalsIgnoreCase("true");
        else if (argName.equals("lexiconDir")) {
            if (argValue.startsWith("replace:")) {
                replaceLexicon = true;
                lexiconDirPath = argValue.substring("replace:".length());
            } else {
                lexiconDirPath = argValue;
            }
        } else if (argName.equals("perceptronScoring")) {
            PerceptronScoring perceptronScoring = PerceptronScoring.valueOf(argValue);
            MachineLearningSession.setPerceptronScoring(perceptronScoring);
        } else if (argName.equals("parseComparisonStrategy")) {
            parseComparisonStrategyType = ParseComparisonStrategyType.valueOf(argValue);
        } else if (argName.equals("sentenceReader")) {
            sentenceReaderPath = argValue;
        } else if (argName.equals("skipLabel")) {
            skipLabel = argValue;
        } else if (argName.equals("earlyStop")) {
            earlyStop = argValue.equalsIgnoreCase("true");
        } else if (argName.equals("sentenceFeatures")) {
            sentenceFeaturePath = argValue;
        } else if (argName.equals("tokeniserFeatures")) {
            tokeniserFeaturePath = argValue;
        } else if (argName.equals("tokeniserPatterns")) {
            tokeniserPatternFilePath = argValue;
        } else if (argName.equals("posTaggerFeatures")) {
            posTaggerFeaturePath = argValue;
        } else if (argName.equals("parserFeatures")) {
            parserFeaturePath = argValue;
        } else if (argName.equals("externalResources")) {
            externalResourcePath = argValue;
        } else if (argName.equals("testWords")) {
            String[] parts = argValue.split(";");
            testWords = new HashSet<String>();
            for (String part : parts)
                testWords.add(part);
        } else if (argName.equals("includeLexiconCoverage")) {
            includeLexiconCoverage = argValue.equalsIgnoreCase("true");
        } else if (argName.equals("iterations"))
            iterations = Integer.parseInt(argValue);
        else if (argName.equals("cutoff"))
            cutoff = Integer.parseInt(argValue);
        else if (argName.equals("dependencyLabels"))
            dependencyLabelPath = argValue;
        else if (argName.equals("parsingConstrainer"))
            parsingConstrainerPath = argValue;
        else if (argName.equals("algorithm"))
            algorithm = MachineLearningAlgorithm.valueOf(argValue);
        else if (argName.equals("linearSVMSolver"))
            solverType = LinearSVMSolverType.valueOf(argValue);
        else if (argName.equals("linearSVMCost"))
            constraintViolationCost = Double.parseDouble(argValue);
        else if (argName.equals("linearSVMEpsilon"))
            epsilon = Double.parseDouble(argValue);
        else if (argName.equals("perceptronTolerance"))
            perceptronTolerance = Double.parseDouble(argValue);
        else if (argName.equals("averageAtIntervals"))
            averageAtIntervals = argValue.equalsIgnoreCase("true");
        else if (argName.equals("perceptronObservationPoints")) {
            String[] points = argValue.split(",");
            perceptronObservationPoints = new ArrayList<Integer>();
            for (String point : points)
                perceptronObservationPoints.add(Integer.parseInt(point));
        } else if (argName.equals("patternTokeniser"))
            patternTokeniserType = PatternTokeniserType.valueOf(argValue);
        else if (argName.equals("excludeFile")) {
            excludeFileName = argValue;
        } else {
            System.out.println("Unknown argument: " + argName);
            throw new RuntimeException("Unknown argument: " + argName);
        }
    }

    if (command == null)
        throw new TalismaneException("No command provided.");

    if (command.equals(Command.evaluate)) {
        if (outDirPath.length() == 0)
            throw new RuntimeException("Missing argument: outdir");
    }

    if (startModule == null)
        startModule = module;
    if (startModule == null)
        startModule = Module.SentenceDetector;
    if (endModule == null)
        endModule = module;
    if (endModule == null)
        endModule = Module.Parser;
    if (module == null)
        module = endModule;

    if (command == Command.train) {
        this.predictTransitions = true;
    }

    if (builtInTemplate != null) {
        if (builtInTemplate.equalsIgnoreCase("with_location")) {
            tokeniserTemplateName = "tokeniser_template_with_location.ftl";
            posTaggerTemplateName = "posTagger_template_with_location.ftl";
            parserTemplateName = "parser_conll_template_with_location.ftl";
        } else if (builtInTemplate.equalsIgnoreCase("with_prob")) {
            tokeniserTemplateName = "tokeniser_template_with_prob.ftl";
            posTaggerTemplateName = "posTagger_template_with_prob.ftl";
            parserTemplateName = "parser_conll_template_with_prob.ftl";
        } else if (builtInTemplate.equalsIgnoreCase("with_comments")) {
            posTaggerTemplateName = "posTagger_template_with_comments.ftl";
            parserTemplateName = "parser_conll_template_with_comments.ftl";
        } else {
            throw new TalismaneException("Unknown builtInTemplate: " + builtInTemplate);
        }
    }

    if (posTaggerBeamWidth < 0)
        posTaggerBeamWidth = beamWidth;
    if (parserBeamWidth < 0)
        parserBeamWidth = beamWidth;

    inputCharset = Charset.defaultCharset();
    outputCharset = Charset.defaultCharset();
    if (encoding != null) {
        inputCharset = Charset.forName(encoding);
        outputCharset = Charset.forName(encoding);
    } else {
        if (inputEncoding != null)
            inputCharset = Charset.forName(inputEncoding);
        if (outputEncoding != null)
            outputCharset = Charset.forName(outputEncoding);
    }

    if (fileName == null && inFilePath != null) {
        fileName = inFilePath;
    }

    if (posTagSetPath != null) {
        File posTagSetFile = new File(posTagSetPath);
        Scanner posTagSetScanner = new Scanner(new BufferedReader(
                new InputStreamReader(new FileInputStream(posTagSetFile), this.getInputCharset().name())));

        PosTagSet posTagSet = this.getPosTaggerService().getPosTagSet(posTagSetScanner);
        TalismaneSession.setPosTagSet(posTagSet);
    }

    if (transitionSystemStr != null) {
        TransitionSystem transitionSystem = null;
        if (transitionSystemStr.equalsIgnoreCase("ShiftReduce")) {
            transitionSystem = this.getParserService().getShiftReduceTransitionSystem();
        } else if (transitionSystemStr.equalsIgnoreCase("ArcEager")) {
            transitionSystem = this.getParserService().getArcEagerTransitionSystem();
        } else {
            throw new TalismaneException("Unknown transition system: " + transitionSystemStr);
        }

        if (dependencyLabelPath != null) {
            File dependencyLabelFile = new File(dependencyLabelPath);
            Scanner depLabelScanner = new Scanner(new BufferedReader(
                    new InputStreamReader(new FileInputStream(dependencyLabelFile), "UTF-8")));
            List<String> dependencyLabels = new ArrayList<String>();
            while (depLabelScanner.hasNextLine()) {
                String dependencyLabel = depLabelScanner.nextLine();
                if (!dependencyLabel.startsWith("#"))
                    dependencyLabels.add(dependencyLabel);
            }
            transitionSystem.setDependencyLabels(dependencyLabels);
        }

        TalismaneSession.setTransitionSystem(transitionSystem);
    }

    if (this.lexiconDirPath != null) {
        PosTaggerLexicon lexicon = null;
        LexiconChain lexiconChain = null;

        if (replaceLexicon) {
            lexiconChain = new LexiconChain();
        } else {
            lexicon = this.implementation.getDefaultLexicon();
            if (lexicon instanceof LexiconChain) {
                lexiconChain = (LexiconChain) lexicon;
            } else {
                lexiconChain = new LexiconChain();
                lexiconChain.addLexicon(lexicon);
            }
        }
        File lexiconDir = new File(lexiconDirPath);
        LexiconDeserializer lexiconDeserializer = new LexiconDeserializer();
        List<PosTaggerLexicon> lexicons = lexiconDeserializer.deserializeLexicons(lexiconDir);
        for (PosTaggerLexicon oneLexicon : lexicons) {
            lexiconChain.addLexicon(oneLexicon);
        }

        lexicon = lexiconChain;

        TalismaneSession.setLexicon(lexicon);
    }

    if (externalResourcePath != null) {
        externalResourceFinder = this.getMachineLearningService().getExternalResourceFinder();
        File externalResourceFile = new File(externalResourcePath);
        externalResourceFinder.addExternalResources(externalResourceFile);

        ExternalResourceFinder parserResourceFinder = this.getParserFeatureService()
                .getExternalResourceFinder();
        ExternalResourceFinder posTaggerResourceFinder = this.getPosTaggerFeatureService()
                .getExternalResourceFinder();
        ExternalResourceFinder tokeniserResourceFinder = this.getTokenFeatureService()
                .getExternalResourceFinder();
        ExternalResourceFinder sentenceResourceFinder = this.getSentenceDetectorFeatureService()
                .getExternalResourceFinder();
        for (ExternalResource<?> externalResource : externalResourceFinder.getExternalResources()) {
            parserResourceFinder.addExternalResource(externalResource);
            posTaggerResourceFinder.addExternalResource(externalResource);
            tokeniserResourceFinder.addExternalResource(externalResource);
            sentenceResourceFinder.addExternalResource(externalResource);
        }
    }
}

From source file:com.joliciel.talismane.TalismaneConfig.java

/**
 * TokenFilters to be applied during analysis.
 * @return/*  w w w.j  a  v  a  2s.  co m*/
 */
public List<TokenFilter> getTokenFilters() {
    try {
        if (tokenFilters == null) {
            List<String> tokenFilterDescriptors = new ArrayList<String>();
            tokenFilters = new ArrayList<TokenFilter>();
            for (int i = 0; i <= 1; i++) {
                LOG.debug("Token filters");
                Scanner tokenFilterScanner = null;
                if (i == 0) {
                    if (tokenFiltersPath != null && tokenFiltersPath.length() > 0) {
                        LOG.debug("From: " + tokenFiltersPath);
                        File tokenFilterFile = new File(tokenFiltersPath);
                        tokenFilterScanner = new Scanner(tokenFilterFile);
                    }
                } else {
                    LOG.debug("From default");
                    tokenFilterScanner = this.implementation.getDefaultTokenFiltersScanner();
                }
                if (tokenFilterScanner != null) {
                    while (tokenFilterScanner.hasNextLine()) {
                        String descriptor = tokenFilterScanner.nextLine();
                        LOG.debug(descriptor);
                        tokenFilterDescriptors.add(descriptor);
                        if (descriptor.length() > 0 && !descriptor.startsWith("#")) {
                            TokenFilter tokenFilter = this.getTokenFilterService().getTokenFilter(descriptor);
                            tokenFilters.add(tokenFilter);
                        }
                    }
                }
            }
            this.getDescriptors().put(TokenFilterService.TOKEN_FILTER_DESCRIPTOR_KEY, tokenFilterDescriptors);

        }
        return tokenFilters;
    } catch (Exception e) {
        LogUtils.logError(LOG, e);
        throw new RuntimeException(e);
    }
}

From source file:com.joliciel.talismane.TalismaneConfigImpl.java

@Override
public TokeniserPatternManager getTokeniserPatternManager() {
    if (tokeniserPatternManager == null) {
        if (tokeniserPatternFilePath.length() == 0)
            throw new RuntimeException("Missing argument: tokeniserPatterns");
        try {//from  w  w w . j  ava 2  s .  c om
            File tokeniserPatternFile = this.getFile(tokeniserPatternFilePath);
            Scanner scanner = new Scanner(new BufferedReader(
                    new InputStreamReader(new FileInputStream(tokeniserPatternFile), this.getInputCharset())));
            List<String> patternDescriptors = new ArrayListNoNulls<String>();
            while (scanner.hasNextLine()) {
                String descriptor = scanner.nextLine();
                patternDescriptors.add(descriptor);
                LOG.debug(descriptor);
            }
            scanner.close();

            this.getDescriptors().put(TokeniserPatternService.PATTERN_DESCRIPTOR_KEY, patternDescriptors);

            tokeniserPatternManager = this.getTokeniserPatternService().getPatternManager(patternDescriptors);
        } catch (Exception e) {
            LogUtils.logError(LOG, e);
            throw new RuntimeException(e);
        }
    }
    return tokeniserPatternManager;
}

From source file:com.joliciel.talismane.TalismaneConfigImpl.java

@Override
public Set<ParseConfigurationFeature<?>> getParserFeatures() {
    if (parserFeatures == null) {
        try {/*from w  ww  .  j av a  2  s . c  om*/
            if (parserFeaturePath != null) {
                LOG.debug("Found setting to change parser features");
                File parserFeatureFile = this.getFile(parserFeaturePath);
                Scanner scanner = new Scanner(new BufferedReader(
                        new InputStreamReader(new FileInputStream(parserFeatureFile), this.getInputCharset())));
                List<String> featureDescriptors = new ArrayListNoNulls<String>();
                while (scanner.hasNextLine()) {
                    String descriptor = scanner.nextLine();
                    featureDescriptors.add(descriptor);
                    LOG.debug(descriptor);
                }
                parserFeatures = this.getParserFeatureService().getFeatures(featureDescriptors);

                this.getDescriptors().put(MachineLearningModel.FEATURE_DESCRIPTOR_KEY, featureDescriptors);
            }
        } catch (Exception e) {
            LogUtils.logError(LOG, e);
            throw new RuntimeException(e);
        }
    }
    return parserFeatures;
}