Example usage for java.util Scanner hasNextLine

List of usage examples for java.util Scanner hasNextLine

Introduction

In this page you can find the example usage for java.util Scanner hasNextLine.

Prototype

public boolean hasNextLine() 

Source Link

Document

Returns true if there is another line in the input of this scanner.

Usage

From source file:com.joliciel.talismane.TalismaneConfigImpl.java

@Override
public Set<LanguageDetectorFeature<?>> getLanguageDetectorFeatures() {
    if (languageFeatures == null) {
        try {// www. j a  v  a2s.  c om
            if (languageFeaturePath != null) {
                LOG.debug("Found setting to change language detector features");
                File languageFeatureFile = this.getFile(languageFeaturePath);
                Scanner scanner = new Scanner(new BufferedReader(new InputStreamReader(
                        new FileInputStream(languageFeatureFile), this.getInputCharset())));
                List<String> featureDescriptors = new ArrayListNoNulls<String>();
                while (scanner.hasNextLine()) {
                    String descriptor = scanner.nextLine();
                    featureDescriptors.add(descriptor);
                    LOG.debug(descriptor);
                }
                languageFeatures = this.getLanguageDetectorService().getFeatureSet(featureDescriptors);
                this.getDescriptors().put(MachineLearningModel.FEATURE_DESCRIPTOR_KEY, featureDescriptors);
            }
        } catch (Exception e) {
            LogUtils.logError(LOG, e);
            throw new RuntimeException(e);
        }
    }
    return languageFeatures;
}

From source file:com.joliciel.talismane.TalismaneConfigImpl.java

@Override
public Set<SentenceDetectorFeature<?>> getSentenceDetectorFeatures() {
    if (sentenceFeatures == null) {
        try {//w  ww  .ja v  a  2s .  c o m
            if (sentenceFeaturePath != null) {
                LOG.debug("Found setting to change sentence detector features");
                File sentenceFeatureFile = this.getFile(sentenceFeaturePath);
                Scanner scanner = new Scanner(new BufferedReader(new InputStreamReader(
                        new FileInputStream(sentenceFeatureFile), this.getInputCharset())));
                List<String> featureDescriptors = new ArrayListNoNulls<String>();
                while (scanner.hasNextLine()) {
                    String descriptor = scanner.nextLine();
                    featureDescriptors.add(descriptor);
                    LOG.debug(descriptor);
                }
                sentenceFeatures = this.getSentenceDetectorFeatureService().getFeatureSet(featureDescriptors);
                this.getDescriptors().put(MachineLearningModel.FEATURE_DESCRIPTOR_KEY, featureDescriptors);
            }
        } catch (Exception e) {
            LogUtils.logError(LOG, e);
            throw new RuntimeException(e);
        }
    }
    return sentenceFeatures;
}

From source file:com.joliciel.talismane.TalismaneConfigImpl.java

@Override
public Set<PosTaggerFeature<?>> getPosTaggerFeatures() {
    if (posTaggerFeatures == null) {
        try {//from   ww w.j a  v a 2 s. c o m
            if (posTaggerFeaturePath != null) {
                LOG.debug("Found setting to change pos-tagger features");
                File posTaggerFeatureFile = this.getFile(posTaggerFeaturePath);
                Scanner scanner = new Scanner(new BufferedReader(new InputStreamReader(
                        new FileInputStream(posTaggerFeatureFile), this.getInputCharset())));
                List<String> featureDescriptors = new ArrayListNoNulls<String>();
                while (scanner.hasNextLine()) {
                    String descriptor = scanner.nextLine();
                    featureDescriptors.add(descriptor);
                    LOG.debug(descriptor);
                }
                posTaggerFeatures = this.getPosTaggerFeatureService().getFeatureSet(featureDescriptors);
                this.getDescriptors().put(MachineLearningModel.FEATURE_DESCRIPTOR_KEY, featureDescriptors);
            }
        } catch (Exception e) {
            LogUtils.logError(LOG, e);
            throw new RuntimeException(e);
        }
    }
    return posTaggerFeatures;
}

From source file:com.joliciel.talismane.TalismaneConfigImpl.java

@Override
public Set<TokenPatternMatchFeature<?>> getTokenPatternMatchFeatures() {
    if (tokenPatternMatchFeatures == null) {
        try {/* w w  w  .  j a va2  s  . c  o  m*/
            if (tokeniserFeaturePath != null) {
                LOG.debug("Found setting to change token pattern match features");
                File tokeniserFeatureFile = this.getFile(tokeniserFeaturePath);
                Scanner scanner = new Scanner(new BufferedReader(new InputStreamReader(
                        new FileInputStream(tokeniserFeatureFile), this.getInputCharset())));
                List<String> featureDescriptors = new ArrayListNoNulls<String>();
                while (scanner.hasNextLine()) {
                    String descriptor = scanner.nextLine();
                    featureDescriptors.add(descriptor);
                    LOG.debug(descriptor);
                }
                tokenPatternMatchFeatures = this.getTokenFeatureService()
                        .getTokenPatternMatchFeatureSet(featureDescriptors);
                this.getDescriptors().put(MachineLearningModel.FEATURE_DESCRIPTOR_KEY, featureDescriptors);
            }
        } catch (Exception e) {
            LogUtils.logError(LOG, e);
            throw new RuntimeException(e);
        }
    }
    return tokenPatternMatchFeatures;
}

From source file:com.joliciel.talismane.TalismaneConfigImpl.java

@Override
public Set<TokeniserContextFeature<?>> getTokeniserContextFeatures() {
    if (tokeniserContextFeatures == null) {
        try {//from   www  .j a  v  a 2s . c om
            if (tokeniserFeaturePath != null) {
                TokeniserPatternManager tokeniserPatternManager = this.getTokeniserPatternManager();
                LOG.debug("Found setting to change tokeniser context features");
                File tokeniserFeatureFile = this.getFile(tokeniserFeaturePath);
                Scanner scanner = new Scanner(new BufferedReader(new InputStreamReader(
                        new FileInputStream(tokeniserFeatureFile), this.getInputCharset())));
                List<String> featureDescriptors = new ArrayListNoNulls<String>();
                while (scanner.hasNextLine()) {
                    String descriptor = scanner.nextLine();
                    featureDescriptors.add(descriptor);
                    LOG.debug(descriptor);
                }
                tokeniserContextFeatures = this.getTokenFeatureService().getTokeniserContextFeatureSet(
                        featureDescriptors, tokeniserPatternManager.getParsedTestPatterns());
                this.getDescriptors().put(MachineLearningModel.FEATURE_DESCRIPTOR_KEY, featureDescriptors);
            }
        } catch (Exception e) {
            LogUtils.logError(LOG, e);
            throw new RuntimeException(e);
        }
    }
    return tokeniserContextFeatures;
}

From source file:com.joliciel.talismane.TalismaneConfigImpl.java

public LanguageDetectorAnnotatedCorpusReader getLanguageCorpusReader() {
    try {//w  ww .  j  av a 2 s .  co m
        if (languageCorpusReader == null) {
            File languageCorpusMapFile = this.getFile(languageCorpusMapPath);
            Scanner languageCorpusMapScanner = new Scanner(new BufferedReader(new InputStreamReader(
                    new FileInputStream(languageCorpusMapFile), this.getInputCharset().name())));

            Map<Locale, Reader> languageMap = new HashMap<Locale, Reader>();
            while (languageCorpusMapScanner.hasNextLine()) {
                String line = languageCorpusMapScanner.nextLine();
                String[] parts = line.split("\t");
                Locale locale = Locale.forLanguageTag(parts[0]);
                String corpusPath = parts[1];
                File corpusFile = this.getFile(corpusPath);
                Reader corpusReader = new BufferedReader(
                        new InputStreamReader(new FileInputStream(corpusFile), this.getInputCharset().name()));
                languageMap.put(locale, corpusReader);
            }
            languageCorpusMapScanner.close();
            languageCorpusReader = this.getLanguageDetectorService().getDefaultReader(languageMap);
        }
        this.setCorpusReaderAttributes(languageCorpusReader);
        return languageCorpusReader;
    } catch (IOException e) {
        LogUtils.logError(LOG, e);
        throw new RuntimeException(e);
    }
}

From source file:com.joliciel.talismane.TalismaneConfigImpl.java

/**
 * A regex used to process the input, when pre-annotated.
 * @return//from  www .j a  v  a 2  s  .c o  m
 */
@Override
public String getInputRegex() {
    try {
        if (inputRegex == null && inputPatternFilePath != null && inputPatternFilePath.length() > 0) {
            Scanner inputPatternScanner = null;
            File inputPatternFile = this.getFile(inputPatternFilePath);
            inputPatternScanner = new Scanner(new BufferedReader(new InputStreamReader(
                    new FileInputStream(inputPatternFile), this.getInputCharset().name())));
            if (inputPatternScanner.hasNextLine()) {
                inputRegex = inputPatternScanner.nextLine();
            }
            inputPatternScanner.close();
            if (inputRegex == null)
                throw new TalismaneException("No input pattern found in " + inputPatternFilePath);
        }
        return inputRegex;
    } catch (Exception e) {
        LogUtils.logError(LOG, e);
        throw new RuntimeException(e);
    }
}

From source file:com.joliciel.talismane.TalismaneConfig.java

/**
 * Text marker filters are applied to raw text segments extracted from the stream, 3 segments at a time.
 * This means that if a particular marker crosses segment borders, it is handled correctly.
 * @return/* w  w  w . ja v a  2  s. c  o m*/
 */
public List<TextMarkerFilter> getTextMarkerFilters() {
    try {
        if (textMarkerFilters == null) {
            textMarkerFilters = new ArrayList<TextMarkerFilter>();

            // insert sentence breaks at end of block
            this.addTextMarkerFilter(this.getFilterService().getRegexMarkerFilter(
                    new MarkerFilterType[] { MarkerFilterType.SENTENCE_BREAK }, "" + endBlockCharacter,
                    blockSize));

            // handle newline as requested
            if (newlineMarker.equals(MarkerFilterType.SENTENCE_BREAK))
                this.addTextMarkerFilter(this.getFilterService().getNewlineEndOfSentenceMarker());
            else if (newlineMarker.equals(MarkerFilterType.SPACE))
                this.addTextMarkerFilter(this.getFilterService().getNewlineSpaceMarker());

            // get rid of duplicate white-space always
            this.addTextMarkerFilter(this.getFilterService().getDuplicateWhiteSpaceFilter());

            for (int i = 0; i <= 1; i++) {
                LOG.debug("Text marker filters");
                Scanner textFilterScanner = null;
                if (i == 0) {
                    if (textFiltersPath != null && textFiltersPath.length() > 0) {
                        LOG.debug("From: " + textFiltersPath);
                        File textFilterFile = new File(textFiltersPath);
                        textFilterScanner = new Scanner(new BufferedReader(new InputStreamReader(
                                new FileInputStream(textFilterFile), this.getInputCharset().name())));
                    }
                } else {
                    LOG.debug("From default");
                    textFilterScanner = this.implementation.getDefaultTextMarkerFiltersScanner();
                }
                if (textFilterScanner != null) {
                    while (textFilterScanner.hasNextLine()) {
                        String descriptor = textFilterScanner.nextLine();
                        LOG.debug(descriptor);
                        if (descriptor.length() > 0 && !descriptor.startsWith("#")) {
                            TextMarkerFilter textMarkerFilter = this.getFilterService()
                                    .getTextMarkerFilter(descriptor, blockSize);
                            this.addTextMarkerFilter(textMarkerFilter);
                        }
                    }
                }
            }

        }
        return textMarkerFilters;
    } catch (Exception e) {
        LogUtils.logError(LOG, e);
        throw new RuntimeException(e);
    }
}

From source file:com.joliciel.talismane.TalismaneConfigImpl.java

/**
 * A regex used to process the evaluation corpus.
 * @return//w w  w.  ja  v a2s  . c  o m
 */
@Override
public String getEvaluationRegex() {
    try {
        if (evaluationRegex == null) {
            if (evaluationPatternFilePath != null && evaluationPatternFilePath.length() > 0) {
                Scanner evaluationPatternScanner = null;
                File evaluationPatternFile = this.getFile(evaluationPatternFilePath);
                evaluationPatternScanner = new Scanner(new BufferedReader(new InputStreamReader(
                        new FileInputStream(evaluationPatternFile), this.getInputCharset().name())));
                if (evaluationPatternScanner.hasNextLine()) {
                    evaluationRegex = evaluationPatternScanner.nextLine();
                }
                evaluationPatternScanner.close();
                if (evaluationRegex == null)
                    throw new TalismaneException("No evaluation pattern found in " + evaluationPatternFilePath);
            } else {
                evaluationRegex = this.getInputRegex();
            }
        }
        return evaluationRegex;
    } catch (Exception e) {
        LogUtils.logError(LOG, e);
        throw new RuntimeException(e);
    }
}

From source file:com.joliciel.talismane.TalismaneConfig.java

/**
 * The rules to apply when running the pos-tagger.
 * @return/*from  ww w  .  java  2 s  .c om*/
 */
public List<PosTaggerRule> getPosTaggerRules() {
    try {
        if (posTaggerRules == null) {
            posTaggerRules = new ArrayList<PosTaggerRule>();
            for (int i = 0; i <= 1; i++) {
                Scanner rulesScanner = null;
                if (i == 0) {
                    if (posTaggerRulesReplace)
                        continue;
                    rulesScanner = this.implementation.getDefaultPosTaggerRulesScanner();
                } else {
                    if (posTaggerRuleFilePath != null && posTaggerRuleFilePath.length() > 0) {
                        File posTaggerRuleFile = new File(posTaggerRuleFilePath);
                        rulesScanner = new Scanner(new BufferedReader(new InputStreamReader(
                                new FileInputStream(posTaggerRuleFile), this.getInputCharset().name())));
                    }
                }

                if (rulesScanner != null) {
                    List<String> ruleDescriptors = new ArrayList<String>();
                    while (rulesScanner.hasNextLine()) {
                        String ruleDescriptor = rulesScanner.nextLine();
                        if (ruleDescriptor.length() > 0) {
                            ruleDescriptors.add(ruleDescriptor);
                            LOG.trace(ruleDescriptor);
                        }
                    }
                    List<PosTaggerRule> rules = this.getPosTaggerFeatureService().getRules(ruleDescriptors);
                    posTaggerRules.addAll(rules);

                }
            }
        }
        return posTaggerRules;
    } catch (Exception e) {
        LogUtils.logError(LOG, e);
        throw new RuntimeException(e);
    }
}