Java tutorial
/* * To change this license header, choose License Headers in Project Properties. * To change this template file, choose Tools | Templates * and open the template in the editor. */ package flight_ranker; /** * * @author Aniket */ import java.io.BufferedReader; import java.io.FileInputStream; import java.io.InputStreamReader; import java.io.OutputStreamWriter; import java.io.PrintWriter; import java.util.List; import edu.stanford.nlp.ling.Sentence; import edu.stanford.nlp.ling.TaggedWord; import edu.stanford.nlp.ling.HasWord; import edu.stanford.nlp.ling.CoreLabel; import edu.stanford.nlp.process.CoreLabelTokenFactory; import edu.stanford.nlp.process.DocumentPreprocessor; import edu.stanford.nlp.process.PTBTokenizer; import edu.stanford.nlp.process.TokenizerFactory; import edu.stanford.nlp.tagger.maxent.MaxentTagger; /** This demo shows user-provided sentences (i.e., {@code List<HasWord>}) * being tagged by the tagger. The sentences are generated by direct use * of the DocumentPreprocessor class. * * @author Christopher Manning */ class TaggerDemo2 { private TaggerDemo2() { } public static void main(String[] args) throws Exception { if (args.length != 2) { System.err.println("usage: java TaggerDemo2 modelFile fileToTag"); return; } MaxentTagger tagger = new MaxentTagger(args[0]); TokenizerFactory<CoreLabel> ptbTokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), "untokenizable=noneKeep"); BufferedReader r = new BufferedReader(new InputStreamReader(new FileInputStream(args[1]), "utf-8")); PrintWriter pw = new PrintWriter(new OutputStreamWriter(System.out, "utf-8")); DocumentPreprocessor documentPreprocessor = new DocumentPreprocessor(r); documentPreprocessor.setTokenizerFactory(ptbTokenizerFactory); for (List<HasWord> sentence : documentPreprocessor) { List<TaggedWord> tSentence = tagger.tagSentence(sentence); pw.println(Sentence.listToString(tSentence, false)); } // print the adjectives in one more sentence. This shows how to get at words and tags in a tagged sentence. List<HasWord> sent = Sentence.toWordList("The", "slimy", "slug", "crawled", "over", "the", "long", ",", "green", "grass", "."); List<TaggedWord> taggedSent = tagger.tagSentence(sent); for (TaggedWord tw : taggedSent) { if (tw.tag().startsWith("JJ")) { pw.println(tw.word()); } } pw.close(); } }