NMTFUtils.GetFWords.java Source code

Java tutorial

Introduction

Here is the source code for NMTFUtils.GetFWords.java

Source

/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */
package NMTFUtils;

import java.io.File;
import java.io.IOException;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.commons.io.FileUtils;

/**
 *
 * @author zwj
 */
public class GetFWords {

    static String dicpath = "E:\\cls-acl10-processed_cutshortdoc\\mydata_add_withtraintest\\en_de_books_dvd\\dict.dat";

    public static void main(String[] args) throws IOException {
        String base = "EBGD";
        getTopWordsLSFTL(base);
        //        getTopWordsTriTL(base);
        //        getTopWordsDTL(base);
    }

    public static void getTopWordsLSFTL(String base) throws IOException {
        int TOP_NUM = 20;
        List<String> dictsLists = FileUtils.readLines(new File(dicpath));
        List<String> indexLists = FileUtils.readLines(new File("G:\\syn_github\\KLSNMF\\KLSNMF\\indext.csv"));
        Map<String, String> wordMaps = new HashMap<>();

        for (String line : dictsLists) {
            String[] split = line.split("@:@");
            if (split.length != 2) {
                wordMaps.put(split[0], "");
            } else {
                wordMaps.put(split[0], split[1]);
            }
        }
        PrintStream ps = new PrintStream(
                "G:\\\\\\journal\\" + base + "\\LSFTLs-TopWordt.txt");
        for (int li = 0; li < indexLists.size(); li++) {
            String[] split = indexLists.get(li).split(",");
            StringBuilder topWords = new StringBuilder();
            for (int i = 0; i < TOP_NUM; i++) {
                String get = wordMaps.get(split[i]);
                if (get != null) {
                    topWords.append(get.trim()).append(" ");
                } else {
                    System.out.println(split[i]);
                    return;
                }
            }
            System.out.println(topWords);
            ps.println("TOP:" + (li + 1) + " " + topWords);
        }
        ps.flush();
        ps.close();

    }

    public static void getTopWordsTriTL(String base) throws IOException {
        int TOP_NUM = 20;
        List<String> dictsLists = FileUtils.readLines(new File(dicpath));
        List<String> indexLists = FileUtils.readLines(new File("F:\\matlab_code\\TriTL\\indext.csv"));
        Map<String, String> wordMaps = new HashMap<>();

        for (String line : dictsLists) {
            String[] split = line.split("@:@");
            if (split.length != 2) {
                wordMaps.put(split[0], "");
            } else {
                wordMaps.put(split[0], split[1]);
            }

        }
        PrintStream ps = new PrintStream(
                "G:\\\\\\journal\\" + base + "\\TRITLt-TopWords.txt");
        for (int li = 0; li < indexLists.size(); li++) {
            String[] split = indexLists.get(li).split(",");
            StringBuilder topWords = new StringBuilder();
            for (int i = 0; i < TOP_NUM; i++) {
                String get = wordMaps.get(split[i]);
                if (get != null) {
                    topWords.append(get.trim()).append(" ");
                } else {
                    System.out.println(split[i]);
                    return;
                }
            }
            System.out.println(topWords);
            ps.println("TOP:" + (li + 1) + " " + topWords);
        }
        ps.flush();
        ps.close();

    }

    public static void getTopWordsDTL(String base) throws IOException {
        int TOP_NUM = 20;
        List<String> dictsLists = FileUtils.readLines(new File(dicpath));
        List<String> indexLists = FileUtils.readLines(new File("F:\\matlab_code\\DTL\\indexs.csv"));
        Map<String, String> wordMaps = new HashMap<>();

        for (String line : dictsLists) {
            String[] split = line.split("@:@");
            if (split.length != 2) {
                wordMaps.put(split[0], "");
            } else {
                wordMaps.put(split[0], split[1]);
            }

        }
        PrintStream ps = new PrintStream(
                "G:\\\\\\journal\\" + base + "\\DTLs-TopWords.txt");
        for (int li = 0; li < indexLists.size(); li++) {
            String[] split = indexLists.get(li).split(",");
            StringBuilder topWords = new StringBuilder();
            for (int i = 0; i < TOP_NUM; i++) {
                String get = wordMaps.get(split[i]);
                if (get != null) {
                    topWords.append(get.trim()).append(" ");
                } else {
                    System.out.println(split[i]);
                    return;
                }
            }
            System.out.println(topWords);
            ps.println("TOP:" + (li + 1) + " " + topWords);
        }
        ps.flush();
        ps.close();

    }
}