List of usage examples for java.util Scanner hasNextLine
public boolean hasNextLine()
From source file:com.joliciel.talismane.TalismaneConfig.java
public Set<PosTaggerFeature<?>> getPosTaggerFeatures() { if (posTaggerFeatures == null) { try {/*ww w.j av a 2s . c o m*/ if (posTaggerFeaturePath != null) { LOG.debug("Found setting to change pos-tagger features"); File posTaggerFeatureFile = new File(posTaggerFeaturePath); Scanner scanner = new Scanner(new BufferedReader(new InputStreamReader( new FileInputStream(posTaggerFeatureFile), this.getInputCharset()))); List<String> featureDescriptors = new ArrayList<String>(); while (scanner.hasNextLine()) { String descriptor = scanner.nextLine(); featureDescriptors.add(descriptor); LOG.debug(descriptor); } posTaggerFeatures = this.getPosTaggerFeatureService().getFeatureSet(featureDescriptors); this.getDescriptors().put(MachineLearningModel.FEATURE_DESCRIPTOR_KEY, featureDescriptors); } } catch (Exception e) { LogUtils.logError(LOG, e); throw new RuntimeException(e); } } return posTaggerFeatures; }
From source file:com.joliciel.talismane.TalismaneConfig.java
public Set<TokenPatternMatchFeature<?>> getTokenPatternMatchFeatures() { if (tokenPatternMatchFeatures == null) { try {/*ww w. ja v a 2 s. c o m*/ if (tokeniserFeaturePath != null) { LOG.debug("Found setting to change token pattern match features"); File tokeniserFeatureFile = new File(tokeniserFeaturePath); Scanner scanner = new Scanner(new BufferedReader(new InputStreamReader( new FileInputStream(tokeniserFeatureFile), this.getInputCharset()))); List<String> featureDescriptors = new ArrayList<String>(); while (scanner.hasNextLine()) { String descriptor = scanner.nextLine(); featureDescriptors.add(descriptor); LOG.debug(descriptor); } tokenPatternMatchFeatures = this.getTokenFeatureService() .getTokenPatternMatchFeatureSet(featureDescriptors); this.getDescriptors().put(MachineLearningModel.FEATURE_DESCRIPTOR_KEY, featureDescriptors); } } catch (Exception e) { LogUtils.logError(LOG, e); throw new RuntimeException(e); } } return tokenPatternMatchFeatures; }
From source file:com.joliciel.talismane.TalismaneConfig.java
public Set<TokeniserContextFeature<?>> getTokeniserContextFeatures() { if (tokeniserContextFeatures == null) { try {/* w w w.ja va2 s . co m*/ if (tokeniserFeaturePath != null) { TokeniserPatternManager tokeniserPatternManager = this.getTokeniserPatternManager(); LOG.debug("Found setting to change tokeniser context features"); File tokeniserFeatureFile = new File(tokeniserFeaturePath); Scanner scanner = new Scanner(new BufferedReader(new InputStreamReader( new FileInputStream(tokeniserFeatureFile), this.getInputCharset()))); List<String> featureDescriptors = new ArrayList<String>(); while (scanner.hasNextLine()) { String descriptor = scanner.nextLine(); featureDescriptors.add(descriptor); LOG.debug(descriptor); } tokeniserContextFeatures = this.getTokenFeatureService().getTokeniserContextFeatureSet( featureDescriptors, tokeniserPatternManager.getParsedTestPatterns()); this.getDescriptors().put(MachineLearningModel.FEATURE_DESCRIPTOR_KEY, featureDescriptors); } } catch (Exception e) { LogUtils.logError(LOG, e); throw new RuntimeException(e); } } return tokeniserContextFeatures; }
From source file:io.realm.RealmTests.java
private List<String> getCharacterArray() { List<String> chars_array = new ArrayList<String>(); String file = "assets/unicode_codepoints.csv"; Scanner scanner = new Scanner(getClass().getClassLoader().getResourceAsStream(file), "UTF-8"); int i = 0;// w w w . j a v a2 s . co m String currentUnicode = null; try { while (scanner.hasNextLine()) { currentUnicode = scanner.nextLine(); char[] chars = Character.toChars(Integer.parseInt(currentUnicode, 16)); String codePoint = new String(chars); chars_array.add(codePoint); i++; } } catch (Exception e) { fail("Failure, Codepoint: " + i + " / " + currentUnicode + " " + e.getMessage()); } return chars_array; }
From source file:io.realm.RealmTests.java
@Test public void utf8Tests() { realm.beginTransaction();// ww w. j a va 2s . co m realm.delete(AllTypes.class); realm.commitTransaction(); String file = "assets/unicode_codepoints.csv"; Scanner scanner = new Scanner(getClass().getClassLoader().getResourceAsStream(file), "UTF-8"); int i = 0; String currentUnicode = null; try { realm.beginTransaction(); while (scanner.hasNextLine()) { currentUnicode = scanner.nextLine(); char[] chars = Character.toChars(Integer.parseInt(currentUnicode, 16)); String codePoint = new String(chars); AllTypes o = realm.createObject(AllTypes.class); o.setColumnLong(i); o.setColumnString(codePoint); AllTypes realmType = realm.where(AllTypes.class).equalTo("columnLong", i).findFirst(); if (i > 1) { assertEquals("Codepoint: " + i + " / " + currentUnicode, codePoint, realmType.getColumnString()); // codepoint 0 is NULL, ignore for now. } i++; } realm.commitTransaction(); } catch (Exception e) { fail("Failure, Codepoint: " + i + " / " + currentUnicode + " " + e.getMessage()); } }
From source file:com.joliciel.talismane.TalismaneConfig.java
void loadParameters(Map<String, String> args) throws Exception { if (args.size() == 0) { System.out.println("Talismane usage instructions: "); System.out.println("* indicates optional, + indicates default value"); System.out.println(""); System.out.println(/*w w w . j a va 2s . c o m*/ "Usage: command=analyse *startModule=[sentence+|tokenise|postag|parse] *endModule=[sentence|tokenise|postag|parse+] *inFile=[inFilePath, stdin if missing] *outFile=[outFilePath, stdout if missing] *template=[outputTemplatePath]"); System.out.println(""); System.out.println("Additional optional parameters:"); System.out.println( " *encoding=[UTF-8, ...] *includeDetails=[true|false+] posTaggerRules*=[posTaggerRuleFilePath] textFilters*=[regexFilterFilePath] *sentenceModel=[path] *tokeniserModel=[path] *posTaggerModel=[path] *parserModel=[path] *inputPatternFile=[inputPatternFilePath] *posTagSet=[posTagSetPath]"); return; } String logConfigPath = args.get("logConfigFile"); if (logConfigPath != null) { args.remove("logConfigFile"); Properties props = new Properties(); props.load(new FileInputStream(logConfigPath)); PropertyConfigurator.configure(props); } String performanceConifPath = args.get("performanceConfigFile"); if (performanceConifPath != null) { args.remove("performanceConfigFile"); performanceConfigFile = new File(performanceConifPath); } String encoding = null; String inputEncoding = null; String outputEncoding = null; String builtInTemplate = null; String posTagSetPath = null; String externalResourcePath = null; String transitionSystemStr = null; for (Entry<String, String> arg : args.entrySet()) { String argName = arg.getKey(); String argValue = arg.getValue(); if (argName.equals("command")) { String commandString = argValue; if (commandString.equals("analyze")) commandString = "analyse"; command = Command.valueOf(commandString); } else if (argName.equals("option")) { option = Option.valueOf(argValue); } else if (argName.equals("mode")) { mode = Mode.valueOf(argValue); } else if (argName.equals("module")) { if (argValue.equalsIgnoreCase("sentence") || argValue.equalsIgnoreCase("sentenceDetector")) module = Talismane.Module.SentenceDetector; else if (argValue.equalsIgnoreCase("tokenise") || argValue.equalsIgnoreCase("tokeniser")) module = Talismane.Module.Tokeniser; else if (argValue.equalsIgnoreCase("postag") || argValue.equalsIgnoreCase("posTagger")) module = Talismane.Module.PosTagger; else if (argValue.equalsIgnoreCase("parse") || argValue.equalsIgnoreCase("parser")) module = Talismane.Module.Parser; else throw new TalismaneException("Unknown module: " + argValue); } else if (argName.equals("startModule")) { if (argValue.equalsIgnoreCase("sentence") || argValue.equalsIgnoreCase("sentenceDetector")) startModule = Talismane.Module.SentenceDetector; else if (argValue.equalsIgnoreCase("tokenise") || argValue.equalsIgnoreCase("tokeniser")) startModule = Talismane.Module.Tokeniser; else if (argValue.equalsIgnoreCase("postag") || argValue.equalsIgnoreCase("posTagger")) startModule = Talismane.Module.PosTagger; else if (argValue.equalsIgnoreCase("parse") || argValue.equalsIgnoreCase("parser")) startModule = Talismane.Module.Parser; else throw new TalismaneException("Unknown startModule: " + argValue); } else if (argName.equals("endModule")) { if (argValue.equalsIgnoreCase("sentence") || argValue.equalsIgnoreCase("sentenceDetector")) endModule = Talismane.Module.SentenceDetector; else if (argValue.equalsIgnoreCase("tokenise") || argValue.equalsIgnoreCase("tokeniser")) endModule = Talismane.Module.Tokeniser; else if (argValue.equalsIgnoreCase("postag") || argValue.equalsIgnoreCase("posTagger")) endModule = Talismane.Module.PosTagger; else if (argValue.equalsIgnoreCase("parse") || argValue.equalsIgnoreCase("parser")) endModule = Talismane.Module.Parser; else throw new TalismaneException("Unknown endModule: " + argValue); } else if (argName.equals("inFile")) inFilePath = argValue; else if (argName.equals("outFile")) outFilePath = argValue; else if (argName.equals("outDir")) outDirPath = argValue; else if (argName.equals("template")) templatePath = argValue; else if (argName.equals("builtInTemplate")) builtInTemplate = argValue; else if (argName.equals("encoding")) { if (inputEncoding != null || outputEncoding != null) throw new TalismaneException( "The parameter 'encoding' cannot be used with 'inputEncoding' or 'outputEncoding'"); encoding = argValue; } else if (argName.equals("inputEncoding")) { if (encoding != null) throw new TalismaneException( "The parameter 'encoding' cannot be used with 'inputEncoding' or 'outputEncoding'"); inputEncoding = argValue; } else if (argName.equals("outputEncoding")) { if (encoding != null) throw new TalismaneException( "The parameter 'encoding' cannot be used with 'inputEncoding' or 'outputEncoding'"); outputEncoding = argValue; } else if (argName.equals("includeDetails")) includeDetails = argValue.equalsIgnoreCase("true"); else if (argName.equals("propagateBeam")) propagateBeam = argValue.equalsIgnoreCase("true"); else if (argName.equals("beamWidth")) beamWidth = Integer.parseInt(argValue); else if (argName.equals("sentenceModel")) sentenceModelFilePath = argValue; else if (argName.equals("tokeniserModel")) tokeniserModelFilePath = argValue; else if (argName.equals("posTaggerModel")) posTaggerModelFilePath = argValue; else if (argName.equals("parserModel")) parserModelFilePath = argValue; else if (argName.equals("inputPatternFile")) inputPatternFilePath = argValue; else if (argName.equals("inputPattern")) inputRegex = argValue; else if (argName.equals("evaluationPatternFile")) evaluationPatternFilePath = argValue; else if (argName.equals("evaluationPattern")) evaluationRegex = argValue; else if (argName.equals("posTaggerRules")) { if (argValue.startsWith("replace:")) { posTaggerRulesReplace = true; posTaggerRuleFilePath = argValue.substring("replace:".length()); } else { posTaggerRuleFilePath = argValue; } } else if (argName.equals("parserRules")) { if (argValue.startsWith("replace:")) { parserRulesReplace = true; parserRuleFilePath = argValue.substring("replace:".length()); } else { parserRuleFilePath = argValue; } } else if (argName.equals("posTagSet")) posTagSetPath = argValue; else if (argName.equals("textFilters")) textFiltersPath = argValue; else if (argName.equals("tokenFilters")) tokenFiltersPath = argValue; else if (argName.equals("tokenSequenceFilters")) tokenSequenceFilterPath = argValue; else if (argName.equals("posTagSequenceFilters")) posTagSequenceFilterPath = argValue; else if (argName.equals("logStats")) logStats = argValue.equalsIgnoreCase("true"); else if (argName.equals("newline")) newlineMarker = MarkerFilterType.valueOf(argValue); else if (argName.equals("fileName")) fileName = argValue; else if (argName.equals("processByDefault")) processByDefault = argValue.equalsIgnoreCase("true"); else if (argName.equals("maxParseAnalysisTime")) maxParseAnalysisTime = Integer.parseInt(argValue); else if (argName.equals("minFreeMemory")) minFreeMemory = Integer.parseInt(argValue); else if (argName.equals("transitionSystem")) transitionSystemStr = argValue; else if (argName.equals("sentenceCount")) maxSentenceCount = Integer.parseInt(argValue); else if (argName.equals("endBlockCharCode")) endBlockCharacter = (char) Integer.parseInt(argValue); else if (argName.equals("outputGuesses")) outputGuesses = argValue.equalsIgnoreCase("true"); else if (argName.equals("outputGuessCount")) outputGuessCount = Integer.parseInt(argValue); else if (argName.equals("suffix")) suffix = argValue; else if (argName.equals("includeDistanceFScores")) includeDistanceFScores = argValue.equalsIgnoreCase("true"); else if (argName.equals("evaluationFile")) evaluationFilePath = argValue; else if (argName.equals("labeledEvaluation")) labeledEvaluation = argValue.equalsIgnoreCase("true"); else if (argName.equals("tokeniserBeamWidth")) tokeniserBeamWidth = Integer.parseInt(argValue); else if (argName.equals("posTaggerBeamWidth")) posTaggerBeamWidth = Integer.parseInt(argValue); else if (argName.equals("parserBeamWidth")) parserBeamWidth = Integer.parseInt(argValue); else if (argName.equals("propagateTokeniserBeam")) propagateTokeniserBeam = argValue.equalsIgnoreCase("true"); else if (argName.equals("blockSize")) blockSize = Integer.parseInt(argValue); else if (argName.equals("crossValidationSize")) crossValidationSize = Integer.parseInt(argValue); else if (argName.equals("includeIndex")) includeIndex = Integer.parseInt(argValue); else if (argName.equals("excludeIndex")) excludeIndex = Integer.parseInt(argValue); else if (argName.equals("dynamiseFeatures")) dynamiseFeatures = argValue.equalsIgnoreCase("true"); else if (argName.equals("predictTransitions")) predictTransitions = argValue.equalsIgnoreCase("true"); else if (argName.equals("lexiconDir")) { if (argValue.startsWith("replace:")) { replaceLexicon = true; lexiconDirPath = argValue.substring("replace:".length()); } else { lexiconDirPath = argValue; } } else if (argName.equals("perceptronScoring")) { PerceptronScoring perceptronScoring = PerceptronScoring.valueOf(argValue); MachineLearningSession.setPerceptronScoring(perceptronScoring); } else if (argName.equals("parseComparisonStrategy")) { parseComparisonStrategyType = ParseComparisonStrategyType.valueOf(argValue); } else if (argName.equals("sentenceReader")) { sentenceReaderPath = argValue; } else if (argName.equals("skipLabel")) { skipLabel = argValue; } else if (argName.equals("earlyStop")) { earlyStop = argValue.equalsIgnoreCase("true"); } else if (argName.equals("sentenceFeatures")) { sentenceFeaturePath = argValue; } else if (argName.equals("tokeniserFeatures")) { tokeniserFeaturePath = argValue; } else if (argName.equals("tokeniserPatterns")) { tokeniserPatternFilePath = argValue; } else if (argName.equals("posTaggerFeatures")) { posTaggerFeaturePath = argValue; } else if (argName.equals("parserFeatures")) { parserFeaturePath = argValue; } else if (argName.equals("externalResources")) { externalResourcePath = argValue; } else if (argName.equals("testWords")) { String[] parts = argValue.split(";"); testWords = new HashSet<String>(); for (String part : parts) testWords.add(part); } else if (argName.equals("includeLexiconCoverage")) { includeLexiconCoverage = argValue.equalsIgnoreCase("true"); } else if (argName.equals("iterations")) iterations = Integer.parseInt(argValue); else if (argName.equals("cutoff")) cutoff = Integer.parseInt(argValue); else if (argName.equals("dependencyLabels")) dependencyLabelPath = argValue; else if (argName.equals("parsingConstrainer")) parsingConstrainerPath = argValue; else if (argName.equals("algorithm")) algorithm = MachineLearningAlgorithm.valueOf(argValue); else if (argName.equals("linearSVMSolver")) solverType = LinearSVMSolverType.valueOf(argValue); else if (argName.equals("linearSVMCost")) constraintViolationCost = Double.parseDouble(argValue); else if (argName.equals("linearSVMEpsilon")) epsilon = Double.parseDouble(argValue); else if (argName.equals("perceptronTolerance")) perceptronTolerance = Double.parseDouble(argValue); else if (argName.equals("averageAtIntervals")) averageAtIntervals = argValue.equalsIgnoreCase("true"); else if (argName.equals("perceptronObservationPoints")) { String[] points = argValue.split(","); perceptronObservationPoints = new ArrayList<Integer>(); for (String point : points) perceptronObservationPoints.add(Integer.parseInt(point)); } else if (argName.equals("patternTokeniser")) patternTokeniserType = PatternTokeniserType.valueOf(argValue); else if (argName.equals("excludeFile")) { excludeFileName = argValue; } else { System.out.println("Unknown argument: " + argName); throw new RuntimeException("Unknown argument: " + argName); } } if (command == null) throw new TalismaneException("No command provided."); if (command.equals(Command.evaluate)) { if (outDirPath.length() == 0) throw new RuntimeException("Missing argument: outdir"); } if (startModule == null) startModule = module; if (startModule == null) startModule = Module.SentenceDetector; if (endModule == null) endModule = module; if (endModule == null) endModule = Module.Parser; if (module == null) module = endModule; if (command == Command.train) { this.predictTransitions = true; } if (builtInTemplate != null) { if (builtInTemplate.equalsIgnoreCase("with_location")) { tokeniserTemplateName = "tokeniser_template_with_location.ftl"; posTaggerTemplateName = "posTagger_template_with_location.ftl"; parserTemplateName = "parser_conll_template_with_location.ftl"; } else if (builtInTemplate.equalsIgnoreCase("with_prob")) { tokeniserTemplateName = "tokeniser_template_with_prob.ftl"; posTaggerTemplateName = "posTagger_template_with_prob.ftl"; parserTemplateName = "parser_conll_template_with_prob.ftl"; } else if (builtInTemplate.equalsIgnoreCase("with_comments")) { posTaggerTemplateName = "posTagger_template_with_comments.ftl"; parserTemplateName = "parser_conll_template_with_comments.ftl"; } else { throw new TalismaneException("Unknown builtInTemplate: " + builtInTemplate); } } if (posTaggerBeamWidth < 0) posTaggerBeamWidth = beamWidth; if (parserBeamWidth < 0) parserBeamWidth = beamWidth; inputCharset = Charset.defaultCharset(); outputCharset = Charset.defaultCharset(); if (encoding != null) { inputCharset = Charset.forName(encoding); outputCharset = Charset.forName(encoding); } else { if (inputEncoding != null) inputCharset = Charset.forName(inputEncoding); if (outputEncoding != null) outputCharset = Charset.forName(outputEncoding); } if (fileName == null && inFilePath != null) { fileName = inFilePath; } if (posTagSetPath != null) { File posTagSetFile = new File(posTagSetPath); Scanner posTagSetScanner = new Scanner(new BufferedReader( new InputStreamReader(new FileInputStream(posTagSetFile), this.getInputCharset().name()))); PosTagSet posTagSet = this.getPosTaggerService().getPosTagSet(posTagSetScanner); TalismaneSession.setPosTagSet(posTagSet); } if (transitionSystemStr != null) { TransitionSystem transitionSystem = null; if (transitionSystemStr.equalsIgnoreCase("ShiftReduce")) { transitionSystem = this.getParserService().getShiftReduceTransitionSystem(); } else if (transitionSystemStr.equalsIgnoreCase("ArcEager")) { transitionSystem = this.getParserService().getArcEagerTransitionSystem(); } else { throw new TalismaneException("Unknown transition system: " + transitionSystemStr); } if (dependencyLabelPath != null) { File dependencyLabelFile = new File(dependencyLabelPath); Scanner depLabelScanner = new Scanner(new BufferedReader( new InputStreamReader(new FileInputStream(dependencyLabelFile), "UTF-8"))); List<String> dependencyLabels = new ArrayList<String>(); while (depLabelScanner.hasNextLine()) { String dependencyLabel = depLabelScanner.nextLine(); if (!dependencyLabel.startsWith("#")) dependencyLabels.add(dependencyLabel); } transitionSystem.setDependencyLabels(dependencyLabels); } TalismaneSession.setTransitionSystem(transitionSystem); } if (this.lexiconDirPath != null) { PosTaggerLexicon lexicon = null; LexiconChain lexiconChain = null; if (replaceLexicon) { lexiconChain = new LexiconChain(); } else { lexicon = this.implementation.getDefaultLexicon(); if (lexicon instanceof LexiconChain) { lexiconChain = (LexiconChain) lexicon; } else { lexiconChain = new LexiconChain(); lexiconChain.addLexicon(lexicon); } } File lexiconDir = new File(lexiconDirPath); LexiconDeserializer lexiconDeserializer = new LexiconDeserializer(); List<PosTaggerLexicon> lexicons = lexiconDeserializer.deserializeLexicons(lexiconDir); for (PosTaggerLexicon oneLexicon : lexicons) { lexiconChain.addLexicon(oneLexicon); } lexicon = lexiconChain; TalismaneSession.setLexicon(lexicon); } if (externalResourcePath != null) { externalResourceFinder = this.getMachineLearningService().getExternalResourceFinder(); File externalResourceFile = new File(externalResourcePath); externalResourceFinder.addExternalResources(externalResourceFile); ExternalResourceFinder parserResourceFinder = this.getParserFeatureService() .getExternalResourceFinder(); ExternalResourceFinder posTaggerResourceFinder = this.getPosTaggerFeatureService() .getExternalResourceFinder(); ExternalResourceFinder tokeniserResourceFinder = this.getTokenFeatureService() .getExternalResourceFinder(); ExternalResourceFinder sentenceResourceFinder = this.getSentenceDetectorFeatureService() .getExternalResourceFinder(); for (ExternalResource<?> externalResource : externalResourceFinder.getExternalResources()) { parserResourceFinder.addExternalResource(externalResource); posTaggerResourceFinder.addExternalResource(externalResource); tokeniserResourceFinder.addExternalResource(externalResource); sentenceResourceFinder.addExternalResource(externalResource); } } }
From source file:com.joliciel.talismane.TalismaneConfig.java
/** * TokenFilters to be applied during analysis. * @return/* w w w.j a v a 2s. co m*/ */ public List<TokenFilter> getTokenFilters() { try { if (tokenFilters == null) { List<String> tokenFilterDescriptors = new ArrayList<String>(); tokenFilters = new ArrayList<TokenFilter>(); for (int i = 0; i <= 1; i++) { LOG.debug("Token filters"); Scanner tokenFilterScanner = null; if (i == 0) { if (tokenFiltersPath != null && tokenFiltersPath.length() > 0) { LOG.debug("From: " + tokenFiltersPath); File tokenFilterFile = new File(tokenFiltersPath); tokenFilterScanner = new Scanner(tokenFilterFile); } } else { LOG.debug("From default"); tokenFilterScanner = this.implementation.getDefaultTokenFiltersScanner(); } if (tokenFilterScanner != null) { while (tokenFilterScanner.hasNextLine()) { String descriptor = tokenFilterScanner.nextLine(); LOG.debug(descriptor); tokenFilterDescriptors.add(descriptor); if (descriptor.length() > 0 && !descriptor.startsWith("#")) { TokenFilter tokenFilter = this.getTokenFilterService().getTokenFilter(descriptor); tokenFilters.add(tokenFilter); } } } } this.getDescriptors().put(TokenFilterService.TOKEN_FILTER_DESCRIPTOR_KEY, tokenFilterDescriptors); } return tokenFilters; } catch (Exception e) { LogUtils.logError(LOG, e); throw new RuntimeException(e); } }
From source file:com.joliciel.talismane.TalismaneConfigImpl.java
@Override public TokeniserPatternManager getTokeniserPatternManager() { if (tokeniserPatternManager == null) { if (tokeniserPatternFilePath.length() == 0) throw new RuntimeException("Missing argument: tokeniserPatterns"); try {//from w w w . j ava 2 s . c om File tokeniserPatternFile = this.getFile(tokeniserPatternFilePath); Scanner scanner = new Scanner(new BufferedReader( new InputStreamReader(new FileInputStream(tokeniserPatternFile), this.getInputCharset()))); List<String> patternDescriptors = new ArrayListNoNulls<String>(); while (scanner.hasNextLine()) { String descriptor = scanner.nextLine(); patternDescriptors.add(descriptor); LOG.debug(descriptor); } scanner.close(); this.getDescriptors().put(TokeniserPatternService.PATTERN_DESCRIPTOR_KEY, patternDescriptors); tokeniserPatternManager = this.getTokeniserPatternService().getPatternManager(patternDescriptors); } catch (Exception e) { LogUtils.logError(LOG, e); throw new RuntimeException(e); } } return tokeniserPatternManager; }
From source file:com.joliciel.talismane.TalismaneConfigImpl.java
@Override public Set<ParseConfigurationFeature<?>> getParserFeatures() { if (parserFeatures == null) { try {/*from w ww . j av a 2 s . c om*/ if (parserFeaturePath != null) { LOG.debug("Found setting to change parser features"); File parserFeatureFile = this.getFile(parserFeaturePath); Scanner scanner = new Scanner(new BufferedReader( new InputStreamReader(new FileInputStream(parserFeatureFile), this.getInputCharset()))); List<String> featureDescriptors = new ArrayListNoNulls<String>(); while (scanner.hasNextLine()) { String descriptor = scanner.nextLine(); featureDescriptors.add(descriptor); LOG.debug(descriptor); } parserFeatures = this.getParserFeatureService().getFeatures(featureDescriptors); this.getDescriptors().put(MachineLearningModel.FEATURE_DESCRIPTOR_KEY, featureDescriptors); } } catch (Exception e) { LogUtils.logError(LOG, e); throw new RuntimeException(e); } } return parserFeatures; }