Example usage for edu.stanford.nlp.tagger.maxent MaxentTagger tagSentence

List of usage examples for edu.stanford.nlp.tagger.maxent MaxentTagger tagSentence

Introduction

In this page you can find the example usage for edu.stanford.nlp.tagger.maxent MaxentTagger tagSentence.

Prototype

public List<TaggedWord> tagSentence(List<? extends HasWord> sentence) 

Source Link

Document

Returns a new Sentence that is a copy of the given sentence with all the words tagged with their part-of-speech.

Usage

From source file:ie.pars.bnc.preprocess.ProcessNLP.java

License:Open Source License

private static StringBuilder parseTheSentence(String sentence, Morphology morphology, MaxentTagger posTagger,
        ParserGrammar parser, String sid) {
    TokenizerFactory<Word> newTokenizerFactory = PTBTokenizerFactory.newTokenizerFactory();
    //        TokenizerFactory<WordLemmaTag> tokenizerFactory;
    //        TokenizerFactory<CoreLabel> factory = PTBTokenizer.factory(new CoreLabelTokenFactory() , "");
    //        TokenizerFactory<Word> factory1 = PTBTokenizer.factory();

    StringBuilder results = new StringBuilder();
    results.append("<s id='" + sid + "'>\n");

    StringReader sr = new StringReader(sentence);
    Tokenizer<Word> tokenizer = newTokenizerFactory.getTokenizer(sr);
    List<Word> tokenize = tokenizer.tokenize();

    List<TaggedWord> tagSentence = posTagger.tagSentence(tokenize);

    Tree parseTree = parser.parse(tagSentence);

    GrammaticalStructure gs = parser.getTLPParams().getGrammaticalStructure(parseTree,
            parser.treebankLanguagePack().punctuationWordRejectFilter(),
            parser.getTLPParams().typedDependencyHeadFinder());

    Collection<TypedDependency> deps = gs.typedDependenciesCollapsedTree();
    SemanticGraph depTree = new SemanticGraph(deps);

    for (int i = 0; i < tagSentence.size(); ++i) {

        int head = -1;
        String deprel = null;//from   w  w  w.j  av  a2s. co m
        //                    if (depTree != null) {
        Set<Integer> rootSet = depTree.getRoots().stream().map(IndexedWord::index).collect(Collectors.toSet());
        IndexedWord node = depTree.getNodeByIndexSafe(i + 1);
        if (node != null) {
            List<SemanticGraphEdge> edgeList = depTree.getIncomingEdgesSorted(node);
            if (!edgeList.isEmpty()) {
                assert edgeList.size() == 1;
                head = edgeList.get(0).getGovernor().index();
                deprel = edgeList.get(0).getRelation().toString();
            } else if (rootSet.contains(i + 1)) {
                head = 0;
                deprel = "ROOT";
            }
        }
        //     }

        // Write the token
        TaggedWord lexHead = null;
        if (head > 0) {
            lexHead = tagSentence.get(head - 1);
        }
        results.append(line(i + 1, tagSentence.get(i), morphology, head, deprel, lexHead)).append("\n");
    }
    results.append("</s>\n");
    return results;
}

From source file:net.sourceforge.doddle_owl.ui.InputDocumentSelectionPanel.java

License:Open Source License

private String runStanfordParser(File docFile) {
    File dir = new File(STANFORD_PARSER_MODELS_HOME);
    if (!dir.exists()) {
        dir.mkdir();/*www  .  j  a  va2s  .  co m*/
    }
    BufferedWriter bw = null;
    StringBuilder builder = new StringBuilder();
    try {
        String modelName = "english-left3words-distsim.tagger";
        String modelPath = STANFORD_PARSER_MODELS_HOME + File.separator + modelName;
        File modelFile = new File(modelPath);
        if (!modelFile.exists()) {
            URL url = DODDLE_OWL.class.getClassLoader()
                    .getResource(Utils.RESOURCE_DIR + "stanford_parser_models/" + modelName);
            if (url != null) {
                FileUtils.copyURLToFile(url, modelFile);
                // System.out.println("copy: " +
                // modelFile.getAbsolutePath());
            }
        }
        bw = new BufferedWriter(new OutputStreamWriter(
                new FileOutputStream(STANFORD_PARSER_MODELS_HOME + File.separator + "tmpTagger.txt"), "UTF-8"));
        MaxentTagger tagger = new MaxentTagger(modelFile.getAbsolutePath());
        List<List<HasWord>> sentences = MaxentTagger.tokenizeText(new BufferedReader(new FileReader(docFile)));
        for (List<HasWord> sentence : sentences) {
            List<TaggedWord> tSentence = tagger.tagSentence(sentence);
            bw.write(Sentence.listToString(tSentence, false));
            builder.append(Sentence.listToString(tSentence, false));
        }
        bw.close();
    } catch (IOException ioe) {
        DODDLE_OWL.getLogger().log(Level.DEBUG, "Stanford Parser can not be executed.");
    } catch (Exception e) {
        e.printStackTrace();
    } finally {
        try {
            if (bw != null) {
                bw.close();
            }
        } catch (IOException ioe2) {
            ioe2.printStackTrace();
        }
    }
    return builder.toString();
}

From source file:net.stargraph.core.impl.corenlp.CoreNLPAnnotator.java

License:Open Source License

@Override
protected List<Word> doRun(Language language, String sentence) {
    MaxentTagger tagger = taggers.computeIfAbsent(language, lang -> {
        if (lang == EN) {
            return new MaxentTagger(
                    "edu/stanford/nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger");
        }//from   w  ww  .ja  v a2  s. c o m
        throw new UnsupportedLanguageException(lang);
    });

    PartOfSpeechSet partOfSpeechSet = PartOfSpeechSet.getPOSSet(language);
    List<Word> words = new ArrayList<>();

    List<List<HasWord>> sentences = MaxentTagger.tokenizeText(new StringReader(sentence));
    sentences.forEach(s -> {
        tagger.tagSentence(s).forEach(taggedWord -> words
                .add(new Word(partOfSpeechSet.valueOf(taggedWord.tag()), taggedWord.value())));
    });

    return words;
}

From source file:nlp.main.NER_Main.java

License:Open Source License

public static ArrayList createTestData(String data) {
    try {/*  w  ww.  j a v  a  2  s. c o  m*/
        File file_word_2POS = new File("resources/POS.txt");
        File onlyWords = new File("resources/words.txt");
        PrintWriter pw = new PrintWriter(file_word_2POS);
        PrintWriter pw1 = new PrintWriter(onlyWords);
        ArrayList<String> testData = new ArrayList<>();
        MaxentTagger tagger;
        tagger = new MaxentTagger("Models/tamil.tagger");
        List<List<String[]>> morphed = MainNounDetetectionLayer.getMorph(data);
        Double p = (1.0 / morphed.size()) * 25;

        List<List<HasWord>> sentences = new ArrayList<>();

        morphed.stream().map((List<String[]> sent) -> {
            List<HasWord> newList = new ArrayList<>();
            sent.stream().forEach((s) -> {
                newList.add(new Word(s[0]));
            });
            return newList;
        }).map((newList) -> {
            sentences.add(newList);
            return newList;
        }).forEach((_item) -> {

        });

        boolean last = false;
        int j = 0;

        for (List<HasWord> sentence : sentences) {
            boolean skip = false;
            int size = sentence.size();
            int index = 0;
            List<TaggedWord> tSentence = tagger.tagSentence(sentence);
            for (TaggedWord word : tSentence) {
                String s = String.valueOf(word);
                String[] y = s.split("/");
                s = s.replace("/", "\t");
                pw.write(s + "\t" + morphed.get(j).get(index)[1] + "\n");
                pw1.write(y[0] + "\n");
                index++;
            }
            pw.write("\n");
            pw1.write("\n");
            setProgress(p);
            j++;

        }
        pw1.close();
        pw.close();
        RuleBaseTagger.getTestData("resources/words.txt");

        Process proc = Runtime.getRuntime().exec("scripts/./test.sh");

        // Read the output
        BufferedReader reader = new BufferedReader(new InputStreamReader(proc.getInputStream()));

        String line = "";
        PrintWriter testDataSet = new PrintWriter(new File("resources/test_data.txt"));
        while ((line = reader.readLine()) != null) {
            testData.add(line);
            testDataSet.write(line + "\n");
        }
        testDataSet.close();
        int waitFor = proc.waitFor();

        return testData;

    } catch (IOException | InterruptedException ex) {
        Logger.getLogger(NER_Main.class.getName()).log(Level.SEVERE, null, ex);
    }
    return null;
}

From source file:nlp.main.NER_Main.java

License:Open Source License

public static ArrayList createTestDataWithPrefix(String data, int i) {
    try {//from   w  w  w.ja v a 2s  .c  om
        File file_word_2POS = new File("resources/POS.txt");
        File onlyWords = new File("resources/words.txt");
        PrintWriter pw = new PrintWriter(file_word_2POS);
        PrintWriter pw1 = new PrintWriter(onlyWords);
        ArrayList<String> testData = new ArrayList<>();
        MaxentTagger tagger;
        tagger = new MaxentTagger("Models/tamil.tagger");
        List<List<String[]>> morphed = MainNounDetetectionLayer.getMorph(data);
        Double p = (1.0 / morphed.size()) * 25;

        List<List<HasWord>> sentences = new ArrayList<>();

        morphed.stream().map((List<String[]> sent) -> {
            List<HasWord> newList = new ArrayList<>();
            sent.stream().forEach((s) -> {
                newList.add(new Word(s[0]));
            });
            return newList;
        }).map((newList) -> {
            sentences.add(newList);
            return newList;
        }).forEach((_item) -> {

        });

        boolean last = false;
        int j = 0;

        for (List<HasWord> sentence : sentences) {
            boolean skip = false;
            int size = sentence.size();
            int index = 0;
            List<TaggedWord> tSentence = tagger.tagSentence(sentence);
            for (TaggedWord word : tSentence) {
                String s = String.valueOf(word);
                String[] y = s.split("/");
                s = s.replace("/", "\t");
                pw.write(s + "\t" + morphed.get(j).get(index)[1] + "\n");
                pw1.write(y[0] + "\n");
                index++;
            }
            pw.write("\n");
            pw1.write("\n");
            setProgress(p);
            j++;

        }
        pw1.close();
        pw.close();
        RuleBaseTagger.getTestData("resources/words.txt");
        if (i == 0)
            PrefixFeatureCreation.prefixGeneration("resources/words.txt", 4);
        else
            PrefixFeatureCreation.prefixGeneration("resources/words.txt", 4);

        Process proc = Runtime.getRuntime().exec("scripts/./test_with_prefix.sh");

        // Read the output
        BufferedReader reader = new BufferedReader(new InputStreamReader(proc.getInputStream()));

        String line = "";
        PrintWriter testDataSet = new PrintWriter(new File("resources/test_data.txt"));
        while ((line = reader.readLine()) != null) {
            testData.add(line);
            testDataSet.write(line + "\n");
        }
        testDataSet.close();
        int waitFor = proc.waitFor();

        return testData;

    } catch (IOException | InterruptedException ex) {
        Logger.getLogger(NER_Main.class.getName()).log(Level.SEVERE, null, ex);
    }
    return null;
}

From source file:nlp.pos.POSTagger.java

License:Open Source License

public static void postag(String fileName) {

    try {/* w  w w.j a  v a2s . c  o  m*/
        MaxentTagger tagger;
        tagger = new MaxentTagger("Models/tamil.tagger");

        List<List<HasWord>> sentences = MaxentTagger.tokenizeText(new BufferedReader(new FileReader(fileName)));
        try (PrintWriter writer = new PrintWriter("POSTagged_" + fileName, "UTF-8")) {
            sentences.stream().map((sentence) -> tagger.tagSentence(sentence))
                    .map((List<TaggedWord> tSentence) -> {
                        tSentence.stream().map((word) -> String.valueOf(word)).map((s) -> s.split("/"))
                                .map((y) -> {
                                    writer.println(y[0] + "\t" + y[1]);
                                    return y;
                                }).forEach((y) -> {
                                    System.out.println(y[0] + "\t" + y[1]);
                                });
                        return tSentence;
                    }).map((_item) -> {
                        System.out.println();
                        return _item;
                    }).forEach((_item) -> {
                        writer.println();
                    });
        }
    } catch (FileNotFoundException | UnsupportedEncodingException ex) {
        Logger.getLogger(POSTagger.class.getName()).log(Level.SEVERE, null, ex);
    }
}

From source file:pubmedsearch.PubMedSearch.java

public static List<TaggedWord> tagging(MaxentTagger tagger, List<List<HasWord>> sentences) {
    List<HasWord> sentence = sentences.get(0);
    return tagger.tagSentence(sentence);
}

From source file:RestServices.GetTargetedSentimentResource.java

/**
 * Retrieves representation of an instance of RestServices.GetTargetedSentimentResource
 * @return an instance of java.lang.String
 *///from  w w  w .j a  va 2 s . c  o  m
@GET
@Produces("application/json")
public String getJson(@QueryParam("data") String datas) {
    System.out.println("Working Directory = " + System.getProperty("user.dir"));
    JSONObject objOuter = new JSONObject();
    try {
        JSONObject inputJsonObject = new JSONObject(datas);
        String targetPhrase = inputJsonObject.getString("target");
        String contextText = inputJsonObject.getString("data");

        String modelPath = DependencyParser.DEFAULT_MODEL;

        MaxentTagger tagger = new MaxentTagger(GlobalVarsStore.taggerPath);
        DependencyParser parser = DependencyParser.loadFromModelFile(modelPath);

        DocumentPreprocessor tokenizer = new DocumentPreprocessor(new StringReader(contextText));
        ArrayList<TypedDependency> td = new ArrayList<TypedDependency>();
        for (List<HasWord> sentence : tokenizer) {
            List<TaggedWord> tagged = tagger.tagSentence(sentence);
            GrammaticalStructure gs = parser.predict(tagged);
            td.addAll(gs.typedDependencies());
        }

        BaselineAnalysisTools bat = new BaselineAnalysisTools();
        bat.prepareTools();
        String[] bag = clear(contextText).split("(?:(?:[^a-zA-Z]+')|(?:'[^a-zA-Z]+))|(?:[^a-zA-Z']+)");
        double sent = 0.0;
        ArrayList<Double> weights = new ArrayList<Double>();
        int pos = 0;
        int neg = 0;
        int neu = 0;
        double confidenceContainment = 0.0;
        double confidenceClassification = 0.0;
        for (int i = 0; i < bag.length; i++) {
            Double res = GlobalVarsStore.lex.testWord(bag[i]);
            if (res != null) {
                weights.add(res);
                confidenceContainment++;
            }
        }
        Double totalSent = 0.0;
        for (int i = 0; i < weights.size(); i++) {
            totalSent += weights.get(i);
            if (weights.get(i) > 0) {
                pos++;
            } else if (weights.get(i) < 0) {
                neg++;
            } else {
                neu++;
            }
        }
        if (weights.size() > 0) {
            sent = totalSent / weights.size();
            confidenceContainment = confidenceContainment / bag.length;
            if (sent > 0) {
                confidenceClassification = pos / (double) (pos + neg + neu);
            } else if (sent < 0) {
                confidenceClassification = neg / (double) (pos + neg + neu);
            } else {
                confidenceClassification = neu / (double) (pos + neg + neu);
            }
        }

        String[] targets;
        if (targetPhrase.contains(" ")) {
            targets = targetPhrase.split(" ");
        } else {
            targets = new String[1];
            targets[0] = targetPhrase;
        }
        double tempSent = 0;
        double tSent = 0;
        double tSize = 0;
        double sentMod = 1;
        for (int i = 0; i < targets.length; i++) {
            for (int j = 0; j < td.size(); j++) {
                String secondLevel = null;
                double secondSentMod = 1;
                if (targets[i].equals(td.get(j).gov().value())) {
                    if (td.get(j).reln().getShortName().equals("neg")) {
                        sentMod = -1;
                    } else {
                        if (td.get(j).reln().getShortName().equals("dobj")) {
                            secondLevel = td.get(j).dep().value();
                        }
                        tSize++;
                        tempSent = GlobalVarsStore.lex.testWord(td.get(j).dep().value());
                    }
                } else if (targets[i].equals(td.get(j).dep().value())) {
                    if (td.get(j).reln().getShortName().equals("neg")) {
                        sentMod = -1;
                    } else {
                        if (td.get(j).reln().getShortName().equals("dobj")) {
                            secondLevel = td.get(j).gov().value();
                        }
                        tSize++;
                        tempSent = GlobalVarsStore.lex.testWord(td.get(j).gov().value());
                    }
                }
                if (secondLevel != null) {
                    for (int k = 0; k < td.size(); k++) {
                        if (!targets[i].equals(td.get(k).dep().value())
                                && secondLevel.equals(td.get(k).gov().value())) {
                            if (td.get(k).reln().getShortName().equals("neg")) {
                                secondSentMod = -1;
                            }
                        } else if (!targets[i].equals(td.get(k).gov().value())
                                && secondLevel.equals(td.get(k).dep().value())) {
                            if (td.get(k).reln().getShortName().equals("neg")) {
                                secondSentMod = -1;
                            }
                        }
                    }
                }
                tSent += tempSent * secondSentMod;
            }
        }
        if (tSize > 0) {
            tSent /= tSize;
        }
        if (tSent == 0) {
            tSent = sent * sentMod;
        }

        objOuter.put("SOS", tSent);
        objOuter.put("CONFIDENCE",
                (confidenceClassification * (1 - GlobalVarsStore.containmentConfidenceWeight))
                        + (confidenceContainment * GlobalVarsStore.containmentConfidenceWeight));
    } catch (JSONException ex) {
        Logger.getLogger(GetBatchSentimentResource.class.getName()).log(Level.SEVERE, null, ex);
    }
    return objOuter.toString();
}

From source file:RestServices.Scoresservice.java

public static void main(String[] args) throws ParseException {
    /* String scenario="transportation";
     GlobalVarsStore.lexicon="wordnet";/*  w w  w  .  j  ava 2 s  . co  m*/
     int objective=6;
             
     //Reading source file
     CrawlersConnector ccn=new CrawlersConnector();
     ArrayList<CustomStatus> scenarioTweets = null;
     try {
    scenarioTweets = ccn.readScenario(scenario);
     } catch (IOException ex) {
    System.out.println("<xml><result>Error: "+ex.getMessage()+"</result></xml>");
     } catch (JSONException ex) {
    System.out.println("<xml><result>Error: "+ex.getMessage()+"</result></xml>");
     }
     System.out.println("Parsed " + scenarioTweets.size() + " documents.");
     ArrayList<String> objectiveNames=new ArrayList<String>();
     objectiveNames.add("Change in Level of Service");
     objectiveNames.add("% change of Accidents cost");
     objectiveNames.add("% change of Air pollution (external) cost");
     objectiveNames.add("% change of Noise (external) cost");
     objectiveNames.add("User convenience in using the RP system");
     objectiveNames.add("Availability of alternative routes and modes");
            
     //Calculating SOF
     DecimalFormat df = new DecimalFormat("#.####");
     ArrayList<String> keys=null;
     if(scenario.equalsIgnoreCase("transportation")){
    if(objective==1){keys=ccn.transportKeywords1;}
    else if(objective==2){keys=ccn.transportKeywords2;}
    else if(objective==3){keys=ccn.transportKeywords3;}
    else if(objective==4){keys=ccn.transportKeywords4;}
    else if(objective==5){keys=ccn.transportKeywords5;}
    else if(objective==6){keys=ccn.transportKeywords6;}
     }else if(scenario.equalsIgnoreCase("biofuel")){
    if(objective==1){keys=ccn.biofuelKeywords1;}
    else if(objective==2){keys=ccn.biofuelKeywords2;}
    else if(objective==3){keys=ccn.biofuelKeywords3;}
    else if(objective==4){keys=ccn.biofuelKeywords4;}
     }
     System.out.println("Calculating Score Of Frequency...");
     TopicAnalysisTools tat = new TopicAnalysisTools();
     ArrayList<Double> sofs = new ArrayList<Double>();
     for (int i = 0; i < keys.size(); i++) {
    sofs.add(tat.countFrequency(scenarioTweets, keys.get(i)));
     }
     Double sof=0.0;
     for (int i = 0; i < sofs.size(); i++) {
    sof+=sofs.get(i);
     }
     if(sofs.size()>0) sof=sof/sofs.size();
     System.out.println("Score of Frequency for objective '" + objectiveNames.get(objective-1) + "' is " + df.format(sof));
            
     //Calculating SOS
     System.out.println("Calculating Score Of Sentiment...");
     BaselineAnalysisTools bat = new BaselineAnalysisTools();
     bat.prepareTools();
     ArrayList<CustomStatus> tweets=null;
     Double sos=0.0;
     ArrayList<Double> soses = new ArrayList<Double>();
     for (int i = 0; i < keys.size(); i++) {
    try {
        tweets=ccn.readKeyword(keys.get(i));
        soses.add(bat.SentiWordNetMeanAnalysis(tweets,keys.get(i)));
    } catch (IOException ex) { ex.printStackTrace(); soses.add(0.0);
    } catch (JSONException ex) { ex.printStackTrace();soses.add(0.0);
    }
     }
     for (int i = 0; i < soses.size(); i++) {
    sos+=soses.get(i);
     }
     if(soses.size()>0) sos=sos/soses.size();
     System.out.println("Score of Sentiment for objective '" + objectiveNames.get(objective-1) + "' is " + df.format(sos));*/

    //String tweetDate="Thu Jul 23 00:00:00 CEST 2015";
    //DateFormat df = new SimpleDateFormat("EEE MMM dd kk:mm:ss z yyyy", Locale.ENGLISH);
    //Date result =  df.parse(tweetDate);
    //tweetDate = (result.getYear()+1900)+"-"+(result.getMonth()+1)+"-"+result.getDate();
    //System.out.println(tweetDate);

    String modelPath = DependencyParser.DEFAULT_MODEL;
    String taggerPath = "C:\\Users\\ViP\\Copy\\NTUA\\Code\\ConsensusPublicOpinion\\models\\english-left3words-distsim.tagger";

    for (int argIndex = 0; argIndex < args.length;) {
        switch (args[argIndex]) {
        case "-tagger":
            taggerPath = args[argIndex + 1];
            argIndex += 2;
            break;
        case "-model":
            modelPath = args[argIndex + 1];
            argIndex += 2;
            break;
        default:
            throw new RuntimeException("Unknown argument " + args[argIndex]);
        }
    }

    String text = "I love apples and do not hate oranges";

    MaxentTagger tagger = new MaxentTagger(taggerPath);
    DependencyParser parser = DependencyParser.loadFromModelFile(modelPath);

    DocumentPreprocessor tokenizer = new DocumentPreprocessor(new StringReader(text));
    for (List<HasWord> sentence : tokenizer) {
        List<TaggedWord> tagged = tagger.tagSentence(sentence);
        GrammaticalStructure gs = parser.predict(tagged);

        // Print typed dependencies
        System.out.println(gs);
        ArrayList<TypedDependency> cd = (ArrayList<TypedDependency>) gs.typedDependencies();
        for (int i = 0; i < cd.size(); i++) {
            System.out.println(String.format("%1$" + 10 + "s", cd.get(i).gov().value()) + "\t"
                    + String.format("%1$" + 10 + "s", cd.get(i).dep().value()) + "\t"
                    + cd.get(i).reln().getShortName() + "\t" + cd.get(i).reln().getLongName());
        }
    }
}