context.core.task.pos.POSTagger.java Source code

Java tutorial

Introduction

Here is the source code for context.core.task.pos.POSTagger.java

Source

/*
     
 * Copyright (c) 2015 University of Illinois Board of Trustees, All rights reserved.   
 * Developed at GSLIS/ the iSchool, by Dr. Jana Diesner, Amirhossein Aleyasen,    
 * Chieh-Li Chin, Shubhanshu Mishra, Kiumars Soltani, and Liang Tao.     
 *   
 * This program is free software; you can redistribute it and/or modify it under   
 * the terms of the GNU General Public License as published by the Free Software   
 * Foundation; either version 2 of the License, or any later version.   
 *    
 * This program is distributed in the hope that it will be useful, but WITHOUT   
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or    
 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for   
 * more details.   
 *    
 * You should have received a copy of the GNU General Public License along with   
 * this program; if not, see <http://www.gnu.org/licenses>.   
 *
     
     
 */
package context.core.task.pos;

import context.app.AppConfig;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.TaggedWord;
import edu.stanford.nlp.pipeline.StanfordCoreNLP;
import edu.stanford.nlp.tagger.maxent.MaxentTagger;
import java.io.IOException;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;

/**
 *
 * @author Aale
 */
public class POSTagger {

    private static StanfordCoreNLP pipeline;
    private static Map<String, String> tagger_models_str = new HashMap<String, String>();
    private static Map<String, MaxentTagger> tagger_models = new HashMap<String, MaxentTagger>();

    static {
        Properties props = new Properties();
        props.put("annotators", "tokenize");
        pipeline = new StanfordCoreNLP(props);

        tagger_models_str.put("en",
                AppConfig.getUserDirLoc() + "/data/pos/models/wsj-0-18-left3words-distsim.tagger");
        tagger_models_str.put("ar", AppConfig.getUserDirLoc() + "/data/pos/models/arabic-fast.tagger");
        tagger_models_str.put("ch", AppConfig.getUserDirLoc() + "/data/pos/models/chinese-distsim.tagger");
        tagger_models_str.put("fr", AppConfig.getUserDirLoc() + "/data/pos/models/french.tagger");
        tagger_models_str.put("de", AppConfig.getUserDirLoc() + "/data/pos/models/german-fast.tagger");

    }

    /**
     *
     * @param args
     * @throws ClassNotFoundException
     * @throws IOException
     */
    public static void main(String[] args) throws ClassNotFoundException, IOException {

        // Initialize the tagger
        MaxentTagger tagger = getTagger("en");

        // The sample string
        String sample = "    ";
        // The tagged string
        String tagged = tagger.tagString(sample);
        // Output the result
        System.out.println(tagged);
        //        List<CoreLabel> sent = Sentence.toWordList("This", "is", "a", "sample", "text");
        //        tag(sent, "en");
    }

    /**
     *
     * @param sent
     * @param language
     * @return
     */
    public static List<TaggedWord> tag(List<CoreLabel> sent, String language) {
        MaxentTagger tagger = getTagger(language);
        //        List<HasWord> sent = Sentence.toWordList("This is a sample text");
        List<TaggedWord> taggedSent = tagger.tagSentence(sent);
        //        for (TaggedWord tw : taggedSent) {
        //            System.out.println(tw.word() + "\t" + tw.tag());
        //        }
        return taggedSent;
    }

    private static MaxentTagger getTagger(String language) {
        MaxentTagger tagger = tagger_models.get(language);
        if (tagger == null) {
            //            try {
            tagger_models.put(language, new MaxentTagger(tagger_models_str.get(language)));
            return tagger_models.get(language);
            //            } catch (IOException | ClassNotFoundException ex) {
            //                Exceptions.printStackTrace(ex);
            //            }
        }
        return tagger;
    }
}