List of usage examples for edu.stanford.nlp.tagger.maxent MaxentTagger tagSentence
public List<TaggedWord> tagSentence(List<? extends HasWord> sentence)
From source file:ie.pars.bnc.preprocess.ProcessNLP.java
License:Open Source License
private static StringBuilder parseTheSentence(String sentence, Morphology morphology, MaxentTagger posTagger, ParserGrammar parser, String sid) { TokenizerFactory<Word> newTokenizerFactory = PTBTokenizerFactory.newTokenizerFactory(); // TokenizerFactory<WordLemmaTag> tokenizerFactory; // TokenizerFactory<CoreLabel> factory = PTBTokenizer.factory(new CoreLabelTokenFactory() , ""); // TokenizerFactory<Word> factory1 = PTBTokenizer.factory(); StringBuilder results = new StringBuilder(); results.append("<s id='" + sid + "'>\n"); StringReader sr = new StringReader(sentence); Tokenizer<Word> tokenizer = newTokenizerFactory.getTokenizer(sr); List<Word> tokenize = tokenizer.tokenize(); List<TaggedWord> tagSentence = posTagger.tagSentence(tokenize); Tree parseTree = parser.parse(tagSentence); GrammaticalStructure gs = parser.getTLPParams().getGrammaticalStructure(parseTree, parser.treebankLanguagePack().punctuationWordRejectFilter(), parser.getTLPParams().typedDependencyHeadFinder()); Collection<TypedDependency> deps = gs.typedDependenciesCollapsedTree(); SemanticGraph depTree = new SemanticGraph(deps); for (int i = 0; i < tagSentence.size(); ++i) { int head = -1; String deprel = null;//from w w w.j av a2s. co m // if (depTree != null) { Set<Integer> rootSet = depTree.getRoots().stream().map(IndexedWord::index).collect(Collectors.toSet()); IndexedWord node = depTree.getNodeByIndexSafe(i + 1); if (node != null) { List<SemanticGraphEdge> edgeList = depTree.getIncomingEdgesSorted(node); if (!edgeList.isEmpty()) { assert edgeList.size() == 1; head = edgeList.get(0).getGovernor().index(); deprel = edgeList.get(0).getRelation().toString(); } else if (rootSet.contains(i + 1)) { head = 0; deprel = "ROOT"; } } // } // Write the token TaggedWord lexHead = null; if (head > 0) { lexHead = tagSentence.get(head - 1); } results.append(line(i + 1, tagSentence.get(i), morphology, head, deprel, lexHead)).append("\n"); } results.append("</s>\n"); return results; }
From source file:net.sourceforge.doddle_owl.ui.InputDocumentSelectionPanel.java
License:Open Source License
private String runStanfordParser(File docFile) { File dir = new File(STANFORD_PARSER_MODELS_HOME); if (!dir.exists()) { dir.mkdir();/*www . j a va2s . co m*/ } BufferedWriter bw = null; StringBuilder builder = new StringBuilder(); try { String modelName = "english-left3words-distsim.tagger"; String modelPath = STANFORD_PARSER_MODELS_HOME + File.separator + modelName; File modelFile = new File(modelPath); if (!modelFile.exists()) { URL url = DODDLE_OWL.class.getClassLoader() .getResource(Utils.RESOURCE_DIR + "stanford_parser_models/" + modelName); if (url != null) { FileUtils.copyURLToFile(url, modelFile); // System.out.println("copy: " + // modelFile.getAbsolutePath()); } } bw = new BufferedWriter(new OutputStreamWriter( new FileOutputStream(STANFORD_PARSER_MODELS_HOME + File.separator + "tmpTagger.txt"), "UTF-8")); MaxentTagger tagger = new MaxentTagger(modelFile.getAbsolutePath()); List<List<HasWord>> sentences = MaxentTagger.tokenizeText(new BufferedReader(new FileReader(docFile))); for (List<HasWord> sentence : sentences) { List<TaggedWord> tSentence = tagger.tagSentence(sentence); bw.write(Sentence.listToString(tSentence, false)); builder.append(Sentence.listToString(tSentence, false)); } bw.close(); } catch (IOException ioe) { DODDLE_OWL.getLogger().log(Level.DEBUG, "Stanford Parser can not be executed."); } catch (Exception e) { e.printStackTrace(); } finally { try { if (bw != null) { bw.close(); } } catch (IOException ioe2) { ioe2.printStackTrace(); } } return builder.toString(); }
From source file:net.stargraph.core.impl.corenlp.CoreNLPAnnotator.java
License:Open Source License
@Override protected List<Word> doRun(Language language, String sentence) { MaxentTagger tagger = taggers.computeIfAbsent(language, lang -> { if (lang == EN) { return new MaxentTagger( "edu/stanford/nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger"); }//from w ww .ja v a2 s. c o m throw new UnsupportedLanguageException(lang); }); PartOfSpeechSet partOfSpeechSet = PartOfSpeechSet.getPOSSet(language); List<Word> words = new ArrayList<>(); List<List<HasWord>> sentences = MaxentTagger.tokenizeText(new StringReader(sentence)); sentences.forEach(s -> { tagger.tagSentence(s).forEach(taggedWord -> words .add(new Word(partOfSpeechSet.valueOf(taggedWord.tag()), taggedWord.value()))); }); return words; }
From source file:nlp.main.NER_Main.java
License:Open Source License
public static ArrayList createTestData(String data) { try {/* w ww. j a v a 2 s. c o m*/ File file_word_2POS = new File("resources/POS.txt"); File onlyWords = new File("resources/words.txt"); PrintWriter pw = new PrintWriter(file_word_2POS); PrintWriter pw1 = new PrintWriter(onlyWords); ArrayList<String> testData = new ArrayList<>(); MaxentTagger tagger; tagger = new MaxentTagger("Models/tamil.tagger"); List<List<String[]>> morphed = MainNounDetetectionLayer.getMorph(data); Double p = (1.0 / morphed.size()) * 25; List<List<HasWord>> sentences = new ArrayList<>(); morphed.stream().map((List<String[]> sent) -> { List<HasWord> newList = new ArrayList<>(); sent.stream().forEach((s) -> { newList.add(new Word(s[0])); }); return newList; }).map((newList) -> { sentences.add(newList); return newList; }).forEach((_item) -> { }); boolean last = false; int j = 0; for (List<HasWord> sentence : sentences) { boolean skip = false; int size = sentence.size(); int index = 0; List<TaggedWord> tSentence = tagger.tagSentence(sentence); for (TaggedWord word : tSentence) { String s = String.valueOf(word); String[] y = s.split("/"); s = s.replace("/", "\t"); pw.write(s + "\t" + morphed.get(j).get(index)[1] + "\n"); pw1.write(y[0] + "\n"); index++; } pw.write("\n"); pw1.write("\n"); setProgress(p); j++; } pw1.close(); pw.close(); RuleBaseTagger.getTestData("resources/words.txt"); Process proc = Runtime.getRuntime().exec("scripts/./test.sh"); // Read the output BufferedReader reader = new BufferedReader(new InputStreamReader(proc.getInputStream())); String line = ""; PrintWriter testDataSet = new PrintWriter(new File("resources/test_data.txt")); while ((line = reader.readLine()) != null) { testData.add(line); testDataSet.write(line + "\n"); } testDataSet.close(); int waitFor = proc.waitFor(); return testData; } catch (IOException | InterruptedException ex) { Logger.getLogger(NER_Main.class.getName()).log(Level.SEVERE, null, ex); } return null; }
From source file:nlp.main.NER_Main.java
License:Open Source License
public static ArrayList createTestDataWithPrefix(String data, int i) { try {//from w w w.ja v a 2s .c om File file_word_2POS = new File("resources/POS.txt"); File onlyWords = new File("resources/words.txt"); PrintWriter pw = new PrintWriter(file_word_2POS); PrintWriter pw1 = new PrintWriter(onlyWords); ArrayList<String> testData = new ArrayList<>(); MaxentTagger tagger; tagger = new MaxentTagger("Models/tamil.tagger"); List<List<String[]>> morphed = MainNounDetetectionLayer.getMorph(data); Double p = (1.0 / morphed.size()) * 25; List<List<HasWord>> sentences = new ArrayList<>(); morphed.stream().map((List<String[]> sent) -> { List<HasWord> newList = new ArrayList<>(); sent.stream().forEach((s) -> { newList.add(new Word(s[0])); }); return newList; }).map((newList) -> { sentences.add(newList); return newList; }).forEach((_item) -> { }); boolean last = false; int j = 0; for (List<HasWord> sentence : sentences) { boolean skip = false; int size = sentence.size(); int index = 0; List<TaggedWord> tSentence = tagger.tagSentence(sentence); for (TaggedWord word : tSentence) { String s = String.valueOf(word); String[] y = s.split("/"); s = s.replace("/", "\t"); pw.write(s + "\t" + morphed.get(j).get(index)[1] + "\n"); pw1.write(y[0] + "\n"); index++; } pw.write("\n"); pw1.write("\n"); setProgress(p); j++; } pw1.close(); pw.close(); RuleBaseTagger.getTestData("resources/words.txt"); if (i == 0) PrefixFeatureCreation.prefixGeneration("resources/words.txt", 4); else PrefixFeatureCreation.prefixGeneration("resources/words.txt", 4); Process proc = Runtime.getRuntime().exec("scripts/./test_with_prefix.sh"); // Read the output BufferedReader reader = new BufferedReader(new InputStreamReader(proc.getInputStream())); String line = ""; PrintWriter testDataSet = new PrintWriter(new File("resources/test_data.txt")); while ((line = reader.readLine()) != null) { testData.add(line); testDataSet.write(line + "\n"); } testDataSet.close(); int waitFor = proc.waitFor(); return testData; } catch (IOException | InterruptedException ex) { Logger.getLogger(NER_Main.class.getName()).log(Level.SEVERE, null, ex); } return null; }
From source file:nlp.pos.POSTagger.java
License:Open Source License
public static void postag(String fileName) { try {/* w w w.j a v a2s . c o m*/ MaxentTagger tagger; tagger = new MaxentTagger("Models/tamil.tagger"); List<List<HasWord>> sentences = MaxentTagger.tokenizeText(new BufferedReader(new FileReader(fileName))); try (PrintWriter writer = new PrintWriter("POSTagged_" + fileName, "UTF-8")) { sentences.stream().map((sentence) -> tagger.tagSentence(sentence)) .map((List<TaggedWord> tSentence) -> { tSentence.stream().map((word) -> String.valueOf(word)).map((s) -> s.split("/")) .map((y) -> { writer.println(y[0] + "\t" + y[1]); return y; }).forEach((y) -> { System.out.println(y[0] + "\t" + y[1]); }); return tSentence; }).map((_item) -> { System.out.println(); return _item; }).forEach((_item) -> { writer.println(); }); } } catch (FileNotFoundException | UnsupportedEncodingException ex) { Logger.getLogger(POSTagger.class.getName()).log(Level.SEVERE, null, ex); } }
From source file:pubmedsearch.PubMedSearch.java
public static List<TaggedWord> tagging(MaxentTagger tagger, List<List<HasWord>> sentences) { List<HasWord> sentence = sentences.get(0); return tagger.tagSentence(sentence); }
From source file:RestServices.GetTargetedSentimentResource.java
/** * Retrieves representation of an instance of RestServices.GetTargetedSentimentResource * @return an instance of java.lang.String *///from w w w .j a va 2 s . c o m @GET @Produces("application/json") public String getJson(@QueryParam("data") String datas) { System.out.println("Working Directory = " + System.getProperty("user.dir")); JSONObject objOuter = new JSONObject(); try { JSONObject inputJsonObject = new JSONObject(datas); String targetPhrase = inputJsonObject.getString("target"); String contextText = inputJsonObject.getString("data"); String modelPath = DependencyParser.DEFAULT_MODEL; MaxentTagger tagger = new MaxentTagger(GlobalVarsStore.taggerPath); DependencyParser parser = DependencyParser.loadFromModelFile(modelPath); DocumentPreprocessor tokenizer = new DocumentPreprocessor(new StringReader(contextText)); ArrayList<TypedDependency> td = new ArrayList<TypedDependency>(); for (List<HasWord> sentence : tokenizer) { List<TaggedWord> tagged = tagger.tagSentence(sentence); GrammaticalStructure gs = parser.predict(tagged); td.addAll(gs.typedDependencies()); } BaselineAnalysisTools bat = new BaselineAnalysisTools(); bat.prepareTools(); String[] bag = clear(contextText).split("(?:(?:[^a-zA-Z]+')|(?:'[^a-zA-Z]+))|(?:[^a-zA-Z']+)"); double sent = 0.0; ArrayList<Double> weights = new ArrayList<Double>(); int pos = 0; int neg = 0; int neu = 0; double confidenceContainment = 0.0; double confidenceClassification = 0.0; for (int i = 0; i < bag.length; i++) { Double res = GlobalVarsStore.lex.testWord(bag[i]); if (res != null) { weights.add(res); confidenceContainment++; } } Double totalSent = 0.0; for (int i = 0; i < weights.size(); i++) { totalSent += weights.get(i); if (weights.get(i) > 0) { pos++; } else if (weights.get(i) < 0) { neg++; } else { neu++; } } if (weights.size() > 0) { sent = totalSent / weights.size(); confidenceContainment = confidenceContainment / bag.length; if (sent > 0) { confidenceClassification = pos / (double) (pos + neg + neu); } else if (sent < 0) { confidenceClassification = neg / (double) (pos + neg + neu); } else { confidenceClassification = neu / (double) (pos + neg + neu); } } String[] targets; if (targetPhrase.contains(" ")) { targets = targetPhrase.split(" "); } else { targets = new String[1]; targets[0] = targetPhrase; } double tempSent = 0; double tSent = 0; double tSize = 0; double sentMod = 1; for (int i = 0; i < targets.length; i++) { for (int j = 0; j < td.size(); j++) { String secondLevel = null; double secondSentMod = 1; if (targets[i].equals(td.get(j).gov().value())) { if (td.get(j).reln().getShortName().equals("neg")) { sentMod = -1; } else { if (td.get(j).reln().getShortName().equals("dobj")) { secondLevel = td.get(j).dep().value(); } tSize++; tempSent = GlobalVarsStore.lex.testWord(td.get(j).dep().value()); } } else if (targets[i].equals(td.get(j).dep().value())) { if (td.get(j).reln().getShortName().equals("neg")) { sentMod = -1; } else { if (td.get(j).reln().getShortName().equals("dobj")) { secondLevel = td.get(j).gov().value(); } tSize++; tempSent = GlobalVarsStore.lex.testWord(td.get(j).gov().value()); } } if (secondLevel != null) { for (int k = 0; k < td.size(); k++) { if (!targets[i].equals(td.get(k).dep().value()) && secondLevel.equals(td.get(k).gov().value())) { if (td.get(k).reln().getShortName().equals("neg")) { secondSentMod = -1; } } else if (!targets[i].equals(td.get(k).gov().value()) && secondLevel.equals(td.get(k).dep().value())) { if (td.get(k).reln().getShortName().equals("neg")) { secondSentMod = -1; } } } } tSent += tempSent * secondSentMod; } } if (tSize > 0) { tSent /= tSize; } if (tSent == 0) { tSent = sent * sentMod; } objOuter.put("SOS", tSent); objOuter.put("CONFIDENCE", (confidenceClassification * (1 - GlobalVarsStore.containmentConfidenceWeight)) + (confidenceContainment * GlobalVarsStore.containmentConfidenceWeight)); } catch (JSONException ex) { Logger.getLogger(GetBatchSentimentResource.class.getName()).log(Level.SEVERE, null, ex); } return objOuter.toString(); }
From source file:RestServices.Scoresservice.java
public static void main(String[] args) throws ParseException { /* String scenario="transportation"; GlobalVarsStore.lexicon="wordnet";/* w w w . j ava 2 s . co m*/ int objective=6; //Reading source file CrawlersConnector ccn=new CrawlersConnector(); ArrayList<CustomStatus> scenarioTweets = null; try { scenarioTweets = ccn.readScenario(scenario); } catch (IOException ex) { System.out.println("<xml><result>Error: "+ex.getMessage()+"</result></xml>"); } catch (JSONException ex) { System.out.println("<xml><result>Error: "+ex.getMessage()+"</result></xml>"); } System.out.println("Parsed " + scenarioTweets.size() + " documents."); ArrayList<String> objectiveNames=new ArrayList<String>(); objectiveNames.add("Change in Level of Service"); objectiveNames.add("% change of Accidents cost"); objectiveNames.add("% change of Air pollution (external) cost"); objectiveNames.add("% change of Noise (external) cost"); objectiveNames.add("User convenience in using the RP system"); objectiveNames.add("Availability of alternative routes and modes"); //Calculating SOF DecimalFormat df = new DecimalFormat("#.####"); ArrayList<String> keys=null; if(scenario.equalsIgnoreCase("transportation")){ if(objective==1){keys=ccn.transportKeywords1;} else if(objective==2){keys=ccn.transportKeywords2;} else if(objective==3){keys=ccn.transportKeywords3;} else if(objective==4){keys=ccn.transportKeywords4;} else if(objective==5){keys=ccn.transportKeywords5;} else if(objective==6){keys=ccn.transportKeywords6;} }else if(scenario.equalsIgnoreCase("biofuel")){ if(objective==1){keys=ccn.biofuelKeywords1;} else if(objective==2){keys=ccn.biofuelKeywords2;} else if(objective==3){keys=ccn.biofuelKeywords3;} else if(objective==4){keys=ccn.biofuelKeywords4;} } System.out.println("Calculating Score Of Frequency..."); TopicAnalysisTools tat = new TopicAnalysisTools(); ArrayList<Double> sofs = new ArrayList<Double>(); for (int i = 0; i < keys.size(); i++) { sofs.add(tat.countFrequency(scenarioTweets, keys.get(i))); } Double sof=0.0; for (int i = 0; i < sofs.size(); i++) { sof+=sofs.get(i); } if(sofs.size()>0) sof=sof/sofs.size(); System.out.println("Score of Frequency for objective '" + objectiveNames.get(objective-1) + "' is " + df.format(sof)); //Calculating SOS System.out.println("Calculating Score Of Sentiment..."); BaselineAnalysisTools bat = new BaselineAnalysisTools(); bat.prepareTools(); ArrayList<CustomStatus> tweets=null; Double sos=0.0; ArrayList<Double> soses = new ArrayList<Double>(); for (int i = 0; i < keys.size(); i++) { try { tweets=ccn.readKeyword(keys.get(i)); soses.add(bat.SentiWordNetMeanAnalysis(tweets,keys.get(i))); } catch (IOException ex) { ex.printStackTrace(); soses.add(0.0); } catch (JSONException ex) { ex.printStackTrace();soses.add(0.0); } } for (int i = 0; i < soses.size(); i++) { sos+=soses.get(i); } if(soses.size()>0) sos=sos/soses.size(); System.out.println("Score of Sentiment for objective '" + objectiveNames.get(objective-1) + "' is " + df.format(sos));*/ //String tweetDate="Thu Jul 23 00:00:00 CEST 2015"; //DateFormat df = new SimpleDateFormat("EEE MMM dd kk:mm:ss z yyyy", Locale.ENGLISH); //Date result = df.parse(tweetDate); //tweetDate = (result.getYear()+1900)+"-"+(result.getMonth()+1)+"-"+result.getDate(); //System.out.println(tweetDate); String modelPath = DependencyParser.DEFAULT_MODEL; String taggerPath = "C:\\Users\\ViP\\Copy\\NTUA\\Code\\ConsensusPublicOpinion\\models\\english-left3words-distsim.tagger"; for (int argIndex = 0; argIndex < args.length;) { switch (args[argIndex]) { case "-tagger": taggerPath = args[argIndex + 1]; argIndex += 2; break; case "-model": modelPath = args[argIndex + 1]; argIndex += 2; break; default: throw new RuntimeException("Unknown argument " + args[argIndex]); } } String text = "I love apples and do not hate oranges"; MaxentTagger tagger = new MaxentTagger(taggerPath); DependencyParser parser = DependencyParser.loadFromModelFile(modelPath); DocumentPreprocessor tokenizer = new DocumentPreprocessor(new StringReader(text)); for (List<HasWord> sentence : tokenizer) { List<TaggedWord> tagged = tagger.tagSentence(sentence); GrammaticalStructure gs = parser.predict(tagged); // Print typed dependencies System.out.println(gs); ArrayList<TypedDependency> cd = (ArrayList<TypedDependency>) gs.typedDependencies(); for (int i = 0; i < cd.size(); i++) { System.out.println(String.format("%1$" + 10 + "s", cd.get(i).gov().value()) + "\t" + String.format("%1$" + 10 + "s", cd.get(i).dep().value()) + "\t" + cd.get(i).reln().getShortName() + "\t" + cd.get(i).reln().getLongName()); } } }