List of usage examples for java.util Scanner hasNextLine
public boolean hasNextLine()
From source file:com.joliciel.talismane.TalismaneConfigImpl.java
@Override public Set<LanguageDetectorFeature<?>> getLanguageDetectorFeatures() { if (languageFeatures == null) { try {// www. j a v a2s. c om if (languageFeaturePath != null) { LOG.debug("Found setting to change language detector features"); File languageFeatureFile = this.getFile(languageFeaturePath); Scanner scanner = new Scanner(new BufferedReader(new InputStreamReader( new FileInputStream(languageFeatureFile), this.getInputCharset()))); List<String> featureDescriptors = new ArrayListNoNulls<String>(); while (scanner.hasNextLine()) { String descriptor = scanner.nextLine(); featureDescriptors.add(descriptor); LOG.debug(descriptor); } languageFeatures = this.getLanguageDetectorService().getFeatureSet(featureDescriptors); this.getDescriptors().put(MachineLearningModel.FEATURE_DESCRIPTOR_KEY, featureDescriptors); } } catch (Exception e) { LogUtils.logError(LOG, e); throw new RuntimeException(e); } } return languageFeatures; }
From source file:com.joliciel.talismane.TalismaneConfigImpl.java
@Override public Set<SentenceDetectorFeature<?>> getSentenceDetectorFeatures() { if (sentenceFeatures == null) { try {//w ww .ja v a 2s . c o m if (sentenceFeaturePath != null) { LOG.debug("Found setting to change sentence detector features"); File sentenceFeatureFile = this.getFile(sentenceFeaturePath); Scanner scanner = new Scanner(new BufferedReader(new InputStreamReader( new FileInputStream(sentenceFeatureFile), this.getInputCharset()))); List<String> featureDescriptors = new ArrayListNoNulls<String>(); while (scanner.hasNextLine()) { String descriptor = scanner.nextLine(); featureDescriptors.add(descriptor); LOG.debug(descriptor); } sentenceFeatures = this.getSentenceDetectorFeatureService().getFeatureSet(featureDescriptors); this.getDescriptors().put(MachineLearningModel.FEATURE_DESCRIPTOR_KEY, featureDescriptors); } } catch (Exception e) { LogUtils.logError(LOG, e); throw new RuntimeException(e); } } return sentenceFeatures; }
From source file:com.joliciel.talismane.TalismaneConfigImpl.java
@Override public Set<PosTaggerFeature<?>> getPosTaggerFeatures() { if (posTaggerFeatures == null) { try {//from ww w.j a v a 2 s. c o m if (posTaggerFeaturePath != null) { LOG.debug("Found setting to change pos-tagger features"); File posTaggerFeatureFile = this.getFile(posTaggerFeaturePath); Scanner scanner = new Scanner(new BufferedReader(new InputStreamReader( new FileInputStream(posTaggerFeatureFile), this.getInputCharset()))); List<String> featureDescriptors = new ArrayListNoNulls<String>(); while (scanner.hasNextLine()) { String descriptor = scanner.nextLine(); featureDescriptors.add(descriptor); LOG.debug(descriptor); } posTaggerFeatures = this.getPosTaggerFeatureService().getFeatureSet(featureDescriptors); this.getDescriptors().put(MachineLearningModel.FEATURE_DESCRIPTOR_KEY, featureDescriptors); } } catch (Exception e) { LogUtils.logError(LOG, e); throw new RuntimeException(e); } } return posTaggerFeatures; }
From source file:com.joliciel.talismane.TalismaneConfigImpl.java
@Override public Set<TokenPatternMatchFeature<?>> getTokenPatternMatchFeatures() { if (tokenPatternMatchFeatures == null) { try {/* w w w . j a va2 s . c o m*/ if (tokeniserFeaturePath != null) { LOG.debug("Found setting to change token pattern match features"); File tokeniserFeatureFile = this.getFile(tokeniserFeaturePath); Scanner scanner = new Scanner(new BufferedReader(new InputStreamReader( new FileInputStream(tokeniserFeatureFile), this.getInputCharset()))); List<String> featureDescriptors = new ArrayListNoNulls<String>(); while (scanner.hasNextLine()) { String descriptor = scanner.nextLine(); featureDescriptors.add(descriptor); LOG.debug(descriptor); } tokenPatternMatchFeatures = this.getTokenFeatureService() .getTokenPatternMatchFeatureSet(featureDescriptors); this.getDescriptors().put(MachineLearningModel.FEATURE_DESCRIPTOR_KEY, featureDescriptors); } } catch (Exception e) { LogUtils.logError(LOG, e); throw new RuntimeException(e); } } return tokenPatternMatchFeatures; }
From source file:com.joliciel.talismane.TalismaneConfigImpl.java
@Override public Set<TokeniserContextFeature<?>> getTokeniserContextFeatures() { if (tokeniserContextFeatures == null) { try {//from www .j a v a 2s . c om if (tokeniserFeaturePath != null) { TokeniserPatternManager tokeniserPatternManager = this.getTokeniserPatternManager(); LOG.debug("Found setting to change tokeniser context features"); File tokeniserFeatureFile = this.getFile(tokeniserFeaturePath); Scanner scanner = new Scanner(new BufferedReader(new InputStreamReader( new FileInputStream(tokeniserFeatureFile), this.getInputCharset()))); List<String> featureDescriptors = new ArrayListNoNulls<String>(); while (scanner.hasNextLine()) { String descriptor = scanner.nextLine(); featureDescriptors.add(descriptor); LOG.debug(descriptor); } tokeniserContextFeatures = this.getTokenFeatureService().getTokeniserContextFeatureSet( featureDescriptors, tokeniserPatternManager.getParsedTestPatterns()); this.getDescriptors().put(MachineLearningModel.FEATURE_DESCRIPTOR_KEY, featureDescriptors); } } catch (Exception e) { LogUtils.logError(LOG, e); throw new RuntimeException(e); } } return tokeniserContextFeatures; }
From source file:com.joliciel.talismane.TalismaneConfigImpl.java
public LanguageDetectorAnnotatedCorpusReader getLanguageCorpusReader() { try {//w ww . j av a 2 s . co m if (languageCorpusReader == null) { File languageCorpusMapFile = this.getFile(languageCorpusMapPath); Scanner languageCorpusMapScanner = new Scanner(new BufferedReader(new InputStreamReader( new FileInputStream(languageCorpusMapFile), this.getInputCharset().name()))); Map<Locale, Reader> languageMap = new HashMap<Locale, Reader>(); while (languageCorpusMapScanner.hasNextLine()) { String line = languageCorpusMapScanner.nextLine(); String[] parts = line.split("\t"); Locale locale = Locale.forLanguageTag(parts[0]); String corpusPath = parts[1]; File corpusFile = this.getFile(corpusPath); Reader corpusReader = new BufferedReader( new InputStreamReader(new FileInputStream(corpusFile), this.getInputCharset().name())); languageMap.put(locale, corpusReader); } languageCorpusMapScanner.close(); languageCorpusReader = this.getLanguageDetectorService().getDefaultReader(languageMap); } this.setCorpusReaderAttributes(languageCorpusReader); return languageCorpusReader; } catch (IOException e) { LogUtils.logError(LOG, e); throw new RuntimeException(e); } }
From source file:com.joliciel.talismane.TalismaneConfigImpl.java
/** * A regex used to process the input, when pre-annotated. * @return//from www .j a v a 2 s .c o m */ @Override public String getInputRegex() { try { if (inputRegex == null && inputPatternFilePath != null && inputPatternFilePath.length() > 0) { Scanner inputPatternScanner = null; File inputPatternFile = this.getFile(inputPatternFilePath); inputPatternScanner = new Scanner(new BufferedReader(new InputStreamReader( new FileInputStream(inputPatternFile), this.getInputCharset().name()))); if (inputPatternScanner.hasNextLine()) { inputRegex = inputPatternScanner.nextLine(); } inputPatternScanner.close(); if (inputRegex == null) throw new TalismaneException("No input pattern found in " + inputPatternFilePath); } return inputRegex; } catch (Exception e) { LogUtils.logError(LOG, e); throw new RuntimeException(e); } }
From source file:com.joliciel.talismane.TalismaneConfig.java
/** * Text marker filters are applied to raw text segments extracted from the stream, 3 segments at a time. * This means that if a particular marker crosses segment borders, it is handled correctly. * @return/* w w w . ja v a 2 s. c o m*/ */ public List<TextMarkerFilter> getTextMarkerFilters() { try { if (textMarkerFilters == null) { textMarkerFilters = new ArrayList<TextMarkerFilter>(); // insert sentence breaks at end of block this.addTextMarkerFilter(this.getFilterService().getRegexMarkerFilter( new MarkerFilterType[] { MarkerFilterType.SENTENCE_BREAK }, "" + endBlockCharacter, blockSize)); // handle newline as requested if (newlineMarker.equals(MarkerFilterType.SENTENCE_BREAK)) this.addTextMarkerFilter(this.getFilterService().getNewlineEndOfSentenceMarker()); else if (newlineMarker.equals(MarkerFilterType.SPACE)) this.addTextMarkerFilter(this.getFilterService().getNewlineSpaceMarker()); // get rid of duplicate white-space always this.addTextMarkerFilter(this.getFilterService().getDuplicateWhiteSpaceFilter()); for (int i = 0; i <= 1; i++) { LOG.debug("Text marker filters"); Scanner textFilterScanner = null; if (i == 0) { if (textFiltersPath != null && textFiltersPath.length() > 0) { LOG.debug("From: " + textFiltersPath); File textFilterFile = new File(textFiltersPath); textFilterScanner = new Scanner(new BufferedReader(new InputStreamReader( new FileInputStream(textFilterFile), this.getInputCharset().name()))); } } else { LOG.debug("From default"); textFilterScanner = this.implementation.getDefaultTextMarkerFiltersScanner(); } if (textFilterScanner != null) { while (textFilterScanner.hasNextLine()) { String descriptor = textFilterScanner.nextLine(); LOG.debug(descriptor); if (descriptor.length() > 0 && !descriptor.startsWith("#")) { TextMarkerFilter textMarkerFilter = this.getFilterService() .getTextMarkerFilter(descriptor, blockSize); this.addTextMarkerFilter(textMarkerFilter); } } } } } return textMarkerFilters; } catch (Exception e) { LogUtils.logError(LOG, e); throw new RuntimeException(e); } }
From source file:com.joliciel.talismane.TalismaneConfigImpl.java
/** * A regex used to process the evaluation corpus. * @return//w w w. ja v a2s . c o m */ @Override public String getEvaluationRegex() { try { if (evaluationRegex == null) { if (evaluationPatternFilePath != null && evaluationPatternFilePath.length() > 0) { Scanner evaluationPatternScanner = null; File evaluationPatternFile = this.getFile(evaluationPatternFilePath); evaluationPatternScanner = new Scanner(new BufferedReader(new InputStreamReader( new FileInputStream(evaluationPatternFile), this.getInputCharset().name()))); if (evaluationPatternScanner.hasNextLine()) { evaluationRegex = evaluationPatternScanner.nextLine(); } evaluationPatternScanner.close(); if (evaluationRegex == null) throw new TalismaneException("No evaluation pattern found in " + evaluationPatternFilePath); } else { evaluationRegex = this.getInputRegex(); } } return evaluationRegex; } catch (Exception e) { LogUtils.logError(LOG, e); throw new RuntimeException(e); } }
From source file:com.joliciel.talismane.TalismaneConfig.java
/** * The rules to apply when running the pos-tagger. * @return/*from ww w . java 2 s .c om*/ */ public List<PosTaggerRule> getPosTaggerRules() { try { if (posTaggerRules == null) { posTaggerRules = new ArrayList<PosTaggerRule>(); for (int i = 0; i <= 1; i++) { Scanner rulesScanner = null; if (i == 0) { if (posTaggerRulesReplace) continue; rulesScanner = this.implementation.getDefaultPosTaggerRulesScanner(); } else { if (posTaggerRuleFilePath != null && posTaggerRuleFilePath.length() > 0) { File posTaggerRuleFile = new File(posTaggerRuleFilePath); rulesScanner = new Scanner(new BufferedReader(new InputStreamReader( new FileInputStream(posTaggerRuleFile), this.getInputCharset().name()))); } } if (rulesScanner != null) { List<String> ruleDescriptors = new ArrayList<String>(); while (rulesScanner.hasNextLine()) { String ruleDescriptor = rulesScanner.nextLine(); if (ruleDescriptor.length() > 0) { ruleDescriptors.add(ruleDescriptor); LOG.trace(ruleDescriptor); } } List<PosTaggerRule> rules = this.getPosTaggerFeatureService().getRules(ruleDescriptors); posTaggerRules.addAll(rules); } } } return posTaggerRules; } catch (Exception e) { LogUtils.logError(LOG, e); throw new RuntimeException(e); } }