cooccurrence.emf.java Source code

Java tutorial

Introduction

Here is the source code for cooccurrence.emf.java

Source

/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */
package cooccurrence;

import static cooccurrence.pmi.listFilesForFolder;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.StringTokenizer;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.commons.math3.linear.MatrixUtils;
import org.apache.commons.math3.linear.RealMatrix;

/**
 *
 * @author vishrawa
 */
public class emf {

    public static void main(String args[]) {
        String path = "";
        String writePath = "";
        BufferedReader br = null;
        ArrayList<String> files = new ArrayList<>();
        listFilesForFolder(new File(path), files);
        for (String filePath : files) {
            System.out.println(filePath);
            String fileName = new File(filePath).getName();

            //data structure to store the PPMI matrix in the file
            HashMap<String, HashMap<String, Double>> cooccur = new HashMap<>();
            readFileContents(filePath, cooccur); //reading the file and storing the content in the hashmap
            //Because Matrices are identified by row and col id, the following 
            //lists maps id to corresponding string. Note that matrix is symmetric. 
            ArrayList<String> rowStrings = new ArrayList<>(cooccur.keySet());
            ArrayList<String> colStrings = new ArrayList<>(cooccur.keySet());

            //creating matrix with given dimensions and initializing it to 0
            RealMatrix matrixR = MatrixUtils.createRealMatrix(rowStrings.size(), colStrings.size());
            //populating the matrices based on the co-occur hashmap
            populateMatrixR(matrixR, cooccur, rowStrings, colStrings);

        }
    }

    /**
     * Outer Method to read the file content and populate it in the cooccur
     * hashmap
     *
     * @param filePath
     * @param cooccur
     * @return
     */
    private static long readFileContents(String filePath, HashMap<String, HashMap<String, Double>> cooccur) {
        long totalCount = 0;
        try {

            BufferedReader br = new BufferedReader(new FileReader(filePath));
            String line = "";
            while ((line = br.readLine()) != null) {
                StringTokenizer tok = new StringTokenizer(line, "\t");
                if (tok.countTokens() == 3) {
                    String from = tok.nextToken();
                    String to = tok.nextToken();
                    Double count = Double.parseDouble(tok.nextToken());
                    totalCount += count;
                    addToMatrix(from, to, count, cooccur);

                }
            }

        } catch (FileNotFoundException ex) {
            Logger.getLogger(pmi.class.getName()).log(Level.SEVERE, null, ex);
        } catch (IOException ex) {
            Logger.getLogger(pmi.class.getName()).log(Level.SEVERE, null, ex);
        }
        return totalCount;
    }

    /**
     * Method to insert a triple of from string, to String and the weight to the
     * coocurrence hashMap
     *
     * @param from
     * @param to
     * @param count
     * @param matrix
     */
    private static void addToMatrix(String from, String to, Double count,
            HashMap<String, HashMap<String, Double>> matrix) {
        HashMap<String, Double> innerMatrix;
        if (matrix.containsKey(from)) {
            innerMatrix = matrix.get(from);
        } else {
            innerMatrix = new HashMap<>();
        }
        if (innerMatrix.containsKey(to)) {
            Double countTemp = innerMatrix.get(to);
            countTemp = countTemp + count;
            innerMatrix.put(to, countTemp);
            matrix.put(from, innerMatrix);
        } else {
            innerMatrix.put(to, count);
            matrix.put(from, innerMatrix);
        }

    }

    /**
     * Method to populate the apache matrix from cooccur hashmap
     *
     * @param matrixR
     * @param cooccur
     * @param rowStrings
     * @param colStrings
     */
    private static void populateMatrixR(RealMatrix matrixR, HashMap<String, HashMap<String, Double>> cooccur,
            ArrayList<String> rowStrings, ArrayList<String> colStrings) {
        Iterator iter = cooccur.keySet().iterator();

        while (iter.hasNext()) {
            String row = iter.next().toString();
            int i = rowStrings.indexOf(row);
            HashMap<String, Double> inner = cooccur.get(row);
            for (String col : inner.keySet()) {
                int j = colStrings.indexOf(col);
                double val = inner.get(col);
                matrixR.setEntry(j, i, val); // each column in D represents the vector w-> d_w
            }
            iter.remove();
        }

    }
}