setexpansion.util.MapUtility.java Source code

Java tutorial

Introduction

Here is the source code for setexpansion.util.MapUtility.java

Source

/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */
package setexpansion.util;

/**
 * This is the Utility class and contain methods used by static and dynamic seisa. 
 * @author nawshad and sanket
 */
import com.google.common.collect.Sets;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;

import org.apache.commons.io.FileUtils;
import org.apache.commons.io.LineIterator;
import java.util.Set;
//import sun.applet.Main;

public class MapUtility {
    public static boolean ASC = true;
    public static boolean DESC = false;
    public static Map<String, Integer> termCountMap;
    public static Map<String, String> termFileNameMap;
    public static Map<String, Integer> termPairCountMap;

    {
        termCountMap = new HashMap();
        File file = new File("smalltermCount.txt");
        try {
            LineIterator iterator = FileUtils.lineIterator(file, "UTF-8");
            while (iterator.hasNext()) {
                String line = iterator.nextLine();
                String value[] = line.split("\t");
                termCountMap.put(value[0], Integer.parseInt(value[1]));
            }
            System.out.println("Finished loading the term count");
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
        termPairCountMap = new HashMap();
        file = new File("smallTermPairCount.txt");
        try {
            LineIterator iterator = FileUtils.lineIterator(file, "UTF-8");
            while (iterator.hasNext()) {
                String line = iterator.nextLine();
                String value[] = line.split("\t\t");
                termPairCountMap.put(value[0], Integer.parseInt(value[1]));
            }
            System.out.println("Finished loading the term pair count");
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }

        //      
        //      termFileNameMap = new HashMap();
        //      termFileNameMap.put("9   9/ Ret      WBPX   [ edit ]   West Virginia", "index_file1_aa");
        //      termFileNameMap.put("WBPX   [ edit ]   Washington      Rio de Janeiro   Sioux City", "index_file1_ab");
        //      termFileNameMap.put("Rio de Janeiro   Sinwu      N of Argonia   S of Beresford", "index_file1_ac");
        //      termFileNameMap.put("N of Argonia   S of Bath      Lester Towns   Ricky Andrews", "index_file1_ad");
        //      termFileNameMap.put("Lester Towns   Rick Sortun      Lester Towns   Ricky Andrews", "index_file1_ae");
        //      termFileNameMap.put("JohnYoung   Leftwich      Henry James   T. Snowball", "index_file1_af");
        //      termFileNameMap.put("Henry James   T. S. Eliot      Friedrich Beckh   Max J. Wright", "index_file1_ag");
        //      termFileNameMap.put("Friedrich Beckh   Max Ibel      Dundee   Moose Lake", "index_file1_ah");
        //      termFileNameMap.put("Dundee   Moorreesburg      Crisp of Bungay   Holden of The Firs", "index_file1_ai");
        //      termFileNameMap.put("Crisp of Bungay   Holcroft of Eaton Mascott      Cashel   Gloucestershire Western", "index_file1_aj");
        //      termFileNameMap.put("Cashel   Gloucestershire Eastern      Blind Jumbo   Smiffy", "index_file1_ak");
        //      termFileNameMap.put("Blind Jumbo   Smart Alice      B. Beinke   D. Morgan", "index_file1_al");
        //      termFileNameMap.put("B. Beinke   D. Brown      Akkimeobsi Juryeonda   Dae Jang-hwa Hong-ryeonjeon", "index_file1_am");
        //      termFileNameMap.put("Akkimeobsi Juryeonda   Chunmong      -1250 bil.    -848.3 bil. $A", "index_file1_an");
        //      
    }

    public static Map<String, Double> sortByComparator(Map<String, Double> unsortMap, final boolean order) {

        List<Entry<String, Double>> list = new LinkedList<Entry<String, Double>>(unsortMap.entrySet());

        // Sorting the list based on values
        Collections.sort(list, new Comparator<Entry<String, Double>>() {
            public int compare(Entry<String, Double> o1, Entry<String, Double> o2) {
                if (order) {
                    return o1.getValue().compareTo(o2.getValue());
                } else {
                    return o2.getValue().compareTo(o1.getValue());

                }
            }
        });

        // Maintaining insertion order with the help of LinkedList
        Map<String, Double> sortedMap = new LinkedHashMap<String, Double>();
        for (Entry<String, Double> entry : list) {
            sortedMap.put(entry.getKey(), entry.getValue());
        }

        return sortedMap;
    }

    public static void printMap(Map<String, Double> map) {
        for (Entry<String, Double> entry : map.entrySet()) {
            System.out.println("Key : " + entry.getKey() + " Value : " + entry.getValue());
        }
    }

    public double pairWiseMatrixLookUp(List<String> dictionary, double[][] pairWiseMatrix, String firstTerm,
            String secondTerm) {
        int row = dictionary.indexOf(firstTerm);
        int col = dictionary.indexOf(secondTerm);

        return pairWiseMatrix[row][col];
    }

    public Map<String, Double> findRelScore(Set<String> dictionary, Set<String> seeds) {
        Map<String, Double> relScoreMap = new LinkedHashMap<String, Double>();
        long startTime = System.currentTimeMillis();
        for (String dict : dictionary) {
            double relScore = 0.0;
            // System.out.println("pairWiseMatrix Entity:"+entry.getKey());
            for (String seed : seeds) {
                // System.out.println("Seed Entity:"+s);
                //relScore += this.pairWiseMatrixLookUp(dictionary, pairWiseMatrix, dict, seed);
                relScore += this.getSimilarityScore(dict, seed);
            }

            double finalRelScore = relScore / seeds.size();
            //System.out.println("For dict:" + dict + " relscore:" + finalRelScore);
            relScoreMap.put(dict, finalRelScore);
        }
        long endTime = System.currentTimeMillis();
        //System.out.println("Time to calculate the relavance" + (endTime - startTime));
        return relScoreMap;
    }

    public Map<String, Double> findTopK(Map<String, Double> relScoreMap, double thresHold) {
        Map<String, Double> topKElements = new LinkedHashMap<String, Double>();

        for (Map.Entry<String, Double> entry : relScoreMap.entrySet()) {
            if (entry.getValue() >= thresHold) {
                topKElements.put(entry.getKey(), entry.getValue());
            }
        }

        return topKElements;
    }

    public List<String> keyValuesFromMap(Map<String, Double> relScoreMap) {
        List<String> keys = new ArrayList<String>();
        for (Map.Entry<String, Double> entry : relScoreMap.entrySet()) {
            keys.add(entry.getKey());
        }
        return keys;
    }

    public Map<String, Double> findTopKSet(Map<String, Double> relScoreMap, int topKSetSize) {
        Map<String, Double> topKElements = new LinkedHashMap<String, Double>();
        int i = 0;
        for (Map.Entry<String, Double> entry : relScoreMap.entrySet()) {
            if (i < topKSetSize) {
                topKElements.put(entry.getKey(), entry.getValue());
            }
            i++;
        }
        return topKElements;
    }

    public static Set getDifferenceInIter(Set<String> listI, Set<String> listJ) {
        return Sets.difference(listI, listJ);
    }

    public static boolean isSetDifferent(Set<String> listI, Set<String> listJ) {
        if (Sets.difference(listI, listJ).size() == 0) {
            if (Sets.difference(listJ, listI).size() == 0) {
                return false;
            }
        }
        return true;
    }

    public Map<String, Double> findGScore(Map<String, Double> relScoreMap, Map<String, Double> simScores,
            double alpha) {
        Map<String, Double> gScoreMap = new LinkedHashMap<String, Double>();
        for (Map.Entry<String, Double> entry : relScoreMap.entrySet()) {
            double score = alpha * entry.getValue() + (1 - alpha) * simScores.get(entry.getKey());
            gScoreMap.put(entry.getKey(), score);
        }
        return gScoreMap;
    }

    public List<Double> getValuesFromMap(Map<String, Double> relScoreMap) {
        List<Double> keys = new ArrayList<Double>();
        for (Map.Entry<String, Double> entry : relScoreMap.entrySet()) {
            keys.add(entry.getValue());
        }
        return keys;
    }

    public int OtsusThreshold(List<Double> data) {
        // Otsu's threshold algorithm
        // C++ code by Jordan Bevik <Jordan.Bevic@qtiworld.com>
        // ported to ImageJ plugin by G.Landini
        int k;
        int kStar; // k = the current threshold; kStar = optimal threshold
        double N1, N; // N1 = # points with intensity <=k; N = total number of
                      // points
        double BCV, BCVmax; // The current Between Class Variance and maximum
        // BCV
        double num, denom; // temporary bookeeping
        double Sk; // The total intensity for all histogram points <=k
        double S, L = data.size(); // The total intensity of the image

        // Initialize values:
        S = N = 0;
        for (k = 0; k < L; k++) {
            S += k * data.get(k); // Total histogram intensity
            N += data.get(k); // Total number of data points
        }

        Sk = 0;
        N1 = data.get(0); // The entry for zero intensity
        BCV = 0;
        BCVmax = 0;
        kStar = 0;

        // Look at each possible threshold value,
        // calculate the between-class variance, and decide if it's a max
        for (k = 1; k < L - 1; k++) { // No need to check endpoints k = 0 or k =
            // L-1
            Sk += k * data.get(k);
            N1 += data.get(k);

            // The float casting here is to avoid compiler warning about loss of
            // precision and
            // will prevent overflow in the case of large saturated images
            denom = (double) (N1) * (N - N1); // Maximum value of denom is
            // (N^2)/4 = approx. 3E10

            if (denom != 0) {
                // Float here is to avoid loss of precision when dividing
                num = ((double) N1 / N) * S - Sk; // Maximum value of num =
                // 255*N = approx 8E7
                BCV = (num * num) / denom;
            } else
                BCV = 0;

            if (BCV >= BCVmax) { // Assign the best threshold found so far
                BCVmax = BCV;
                kStar = k;
            }
        }
        // kStar += 1; // Use QTI convention that intensity -> 1 if intensity >=
        // k
        // (the algorithm was developed for I-> 1 if I <= k.)
        return kStar;
    }

    static int MaxEntropy(List<Double> data) {
        // Implements Kapur-Sahoo-Wong (Maximum Entropy) thresholding method
        // Kapur J.N., Sahoo P.K., and Wong A.K.C. (1985) "A New Method for
        // Gray-Level Picture Thresholding Using the Entropy of the Histogram"
        // Graphical Models and Image Processing, 29(3): 273-285
        // M. Emre Celebi
        // 06.15.2007
        // Ported to ImageJ plugin by G.Landini from E Celebi's fourier_0.8 routines
        int threshold = -1;
        int ih, it;
        int first_bin;
        int last_bin;
        double tot_ent; /* total entropy */
        double max_ent; /* max entropy */
        double ent_back; /* entropy of the background pixels at a given threshold */
        double ent_obj; /* entropy of the object pixels at a given threshold */
        double[] norm_histo = new double[data.size()]; /* normalized histogram */
        double[] P1 = new double[data.size()]; /* cumulative normalized histogram */
        double[] P2 = new double[data.size()];

        double total = 0;
        for (ih = 0; ih < data.size(); ih++)
            total += data.get(ih);

        for (ih = 0; ih < data.size(); ih++)
            norm_histo[ih] = data.get(ih) / total;

        P1[0] = norm_histo[0];
        P2[0] = 1.0 - P1[0];
        for (ih = 1; ih < data.size(); ih++) {
            P1[ih] = P1[ih - 1] + norm_histo[ih];
            P2[ih] = 1.0 - P1[ih];
        }

        /* Determine the first non-zero bin */
        first_bin = 0;
        for (ih = 0; ih < data.size(); ih++) {
            if (!(Math.abs(P1[ih]) < 2.220446049250313E-16)) {
                first_bin = ih;
                break;
            }
        }

        /* Determine the last non-zero bin */
        last_bin = data.size() - 1;
        for (ih = data.size() - 1; ih >= first_bin; ih--) {
            if (!(Math.abs(P2[ih]) < 2.220446049250313E-16)) {
                last_bin = ih;
                break;
            }
        }

        // Calculate the total entropy each gray-level
        // and find the threshold that maximizes it 
        max_ent = Double.MIN_VALUE;

        for (it = first_bin; it <= last_bin; it++) {
            /* Entropy of the background pixels */
            ent_back = 0.0;
            for (ih = 0; ih <= it; ih++) {
                if (data.get(ih) != 0) {
                    ent_back -= (norm_histo[ih] / P1[it]) * Math.log(norm_histo[ih] / P1[it]);
                }
            }

            /* Entropy of the object pixels */
            ent_obj = 0.0;
            for (ih = it + 1; ih < data.size(); ih++) {
                if (data.get(ih) != 0) {
                    ent_obj -= (norm_histo[ih] / P2[it]) * Math.log(norm_histo[ih] / P2[it]);
                }
            }

            /* Total entropy */
            tot_ent = ent_back + ent_obj;

            // IJ.log(""+max_ent+"  "+tot_ent);
            if (max_ent < tot_ent) {
                max_ent = tot_ent;
                threshold = it;
            }
        }
        return threshold;
    }

    public static void main(String[] args) {
        List<Double> arr = new ArrayList();
        //      arr.add(0.75);
        //      arr.add(0.75);
        //      arr.add(0.7);
        //      arr.add(0.7);
        //      arr.add(0.65);
        //      arr.add(0.6);
        arr.add(0.333);
        arr.add(0.333);
        arr.add(0.333);
        arr.add(0.333);
        arr.add(0.25);
        arr.add(0.1);
        System.out.println(MaxEntropy(arr));
    }

    public static Set getSimilarity(Set<String> listI, Set<String> listJ) {
        return Sets.difference(listI, listJ);
    }

    public static String getFileAddress(String term1, String term2) {

        String fileIndex = "";
        for (Map.Entry<String, String> entry : termFileNameMap.entrySet()) {
            String key = entry.getKey().toString();
            String[] splits = key.split("\t\t");
            String termToCompare = term1 + "\t" + term2;
            String revTermToCompare = term2 + "\t" + term1;
            if (splits[0].compareTo(termToCompare) >= 0 && splits[1].compareTo(termToCompare) <= 0) {
                fileIndex = entry.getValue().toString();
                return fileIndex;
            }
            if (splits[0].compareTo(revTermToCompare) >= 0 && splits[1].compareTo(revTermToCompare) <= 0) {
                fileIndex = entry.getValue().toString();
                return fileIndex;
            }
        }
        return fileIndex;
    }

    public static double getSimilarityScore(String term1, String term2) {

        Integer listCount1 = termCountMap.get(term1);
        Integer listCount2 = termCountMap.get(term2);
        String termToSearch = term1 + "\t" + term2;
        Integer intersection = null;
        if (termPairCountMap.containsKey(termToSearch)) {
            intersection = termPairCountMap.get(termToSearch);
        } else {
            termToSearch = term2 + "\t" + term1;
            intersection = termPairCountMap.get(termToSearch);
        }
        if (intersection == null) {
            return 0;
        }
        //System.out.println("pairt term found..");
        double union = listCount1 + listCount2 - intersection;
        return intersection / union;
        //String indexDirName = getFileAddress(term1,term2);

        //       SearchEngine se;
        //      try {
        //         se = new SearchEngine("index_small_term_pair");
        //      TopDocs topDocs = se.performSearch(term1+term2, 1);
        //      ScoreDoc[] hits = topDocs.scoreDocs;
        //      if(hits.length ==0){
        //         topDocs = se.performSearch(term2+term1, 1);
        //         hits = topDocs.scoreDocs;
        //      }
        //      for (int i = 0; i < hits.length; i++) {
        //         Document doc = se.getDocument(hits[i].doc);
        //         //System.out.println(doc.get("term") + " " + doc.get("count"));
        //         int intersection = Integer.parseInt(doc.get("count"));
        //         double union = listCount1 +listCount2 - intersection;
        //         return intersection/union;
        //      }
        //      try{
        //       LineIterator iterator = FileUtils.lineIterator(new File("smallTermPairCount.txt"), "UTF-8");
        //       while (iterator.hasNext()) {
        //              String line = iterator.nextLine();
        //              String termVal[] = line.split("\t\t");
        //              String termPair[] = termVal[0].split("\t");
        //              if((termPair[0].equals(term1) && termPair[1].equals(term2)) ||(termPair[1].equals(term1) && termPair[0].equals(term2))){
        //                int intersection = Integer.parseInt(termVal[1]);
        //                double union = listCount1 +listCount2 - intersection;
        //                return intersection/union;
        //             
        //             }
        //          }

        //      System.out.println("performSearch done");
        //       if((termPair[0].equals(term1) && termPair[1].equals(term2)) ||(termPair[1].equals(term1) && termPair[0].equals(term2))){
        //         int interesection = Integer.parseInt(value[1]);
        //         double sim = listCount1 +listCount2 - interesection;
        //         return sim;
        //      
        //      } catch (IOException e) {
        //         // TODO Auto-generated catch block
        //         e.printStackTrace();}
        //       
        //return 0;
    }

    public static void readFile(String fileName) throws IOException {
        File file = new File(fileName);
        LineIterator iterator = FileUtils.lineIterator(file, "UTF-8");
        long startTime = System.currentTimeMillis();
        System.out.println("Starting..");
        while (iterator.hasNext()) {
            iterator.next();
        }
        long endTime = System.currentTimeMillis();
        System.out.println(endTime - startTime);
    }

}