com.mythesis.profileanalysis.Evaluate.java Source code

Java tutorial

Introduction

Here is the source code for com.mythesis.profileanalysis.Evaluate.java

Source

/* 
 * Copyright 2015 Konstantinos Papangelou.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.mythesis.profileanalysis;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Random;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.commons.io.FileUtils;

/**
 * Apply LDA with different parameters and get the best model
 * @author Konstantinos Papangelou
 */
public class Evaluate {
    private String outputDirectory;
    private List<Integer> nTopics;
    private List<Double> alpha;
    private List<Double> beta;
    int niters;
    int top_words;
    List<String> trainingSet;
    List<String> testSet;
    List<String> logLikelihoods;
    int bestNoTopics;
    double bestBeta;

    public Evaluate(String outputDirectory, List<Integer> nTopics, List<Double> beta, int niters, int top_words,
            String LDAdir, double testSplit) {
        this.outputDirectory = outputDirectory;
        this.nTopics = new ArrayList<>();
        this.alpha = new ArrayList<>();
        this.beta = new ArrayList<>();
        this.nTopics = nTopics;
        for (Integer nTopic : nTopics) {
            this.alpha.add((double) (50 / nTopic));
        }
        this.beta = beta;
        this.niters = niters;
        this.top_words = top_words;
        logLikelihoods = new ArrayList<>();
        trainingSet = new ArrayList<>();
        testSet = new ArrayList<>();

        File ldaFile = new File(LDAdir);
        List<String> pages = new ArrayList<>();
        try {
            pages = FileUtils.readLines(ldaFile);
        } catch (IOException ex) {
            Logger.getLogger(Evaluate.class.getName()).log(Level.SEVERE, null, ex);
        }

        pages.remove(0);
        //shuffle pages 
        Collections.shuffle(pages);
        int testSetSize = (int) (pages.size() * testSplit);
        int trainingSetSize = pages.size() - testSetSize;
        int counter = 0;
        Random rn = new Random();
        testSet.add(String.valueOf(testSetSize));
        trainingSet.add(String.valueOf(trainingSetSize));

        while (counter < testSetSize) {
            int index = rn.nextInt(pages.size());
            if (!testSet.contains(pages.get(index))) {
                testSet.add(pages.get(index));
                pages.remove(index);
                counter++;
            }
        }

        trainingSet.addAll(pages);
    }

    /**
     * a method tha performs LDA with different parameters and finds the best model using log likelihood estimation
     * @return the directory that contains the parameters of the best model
     */
    public String performLDAevaluation() {

        Metrics met = new Metrics();
        int numOfExperiments = 0;
        double maxLikelihood = 0;
        String maxLikelihoodDir = "";
        System.out.println("I will perform " + beta.size() * nTopics.size() + " experiments...");
        for (int i = 0; i < nTopics.size(); i++) {
            for (int j = 0; j < beta.size(); j++) {
                numOfExperiments++;
                File trainingFile = new File(
                        outputDirectory + "experiments\\" + numOfExperiments + "\\LDAtrain.txt");
                File testFile = new File(outputDirectory + "experiments\\" + numOfExperiments + "\\LDAtest.txt");
                try {
                    FileUtils.writeLines(trainingFile, trainingSet);
                    FileUtils.writeLines(testFile, testSet);
                } catch (IOException ex) {
                    Logger.getLogger(Evaluate.class.getName()).log(Level.SEVERE, null, ex);
                }
                LDAcall ld = new LDAcall();
                ld.call(nTopics.get(i), alpha.get(i), beta.get(j), niters, top_words,
                        outputDirectory + "experiments\\" + numOfExperiments + "\\", true, "LDAtrain.txt");
                ld.call(nTopics.get(i), alpha.get(i), beta.get(j), niters, top_words,
                        outputDirectory + "experiments\\" + numOfExperiments + "\\", false, "LDAtest.txt");
                double likelihood = met.getLogLikelihood(
                        outputDirectory + "experiments\\" + numOfExperiments + "\\", "LDAtest.txt");
                logLikelihoods.add(String.valueOf(likelihood) + "," + String.valueOf(nTopics.get(i)) + ","
                        + String.valueOf(alpha.get(i)) + "," + String.valueOf(beta.get(j)));
                if (numOfExperiments == 1) {
                    maxLikelihood = likelihood;
                    maxLikelihoodDir = outputDirectory + "experiments\\" + numOfExperiments + "\\";
                    bestNoTopics = nTopics.get(i);
                    bestBeta = beta.get(j);
                } else if (likelihood > maxLikelihood) {
                    maxLikelihood = likelihood;
                    maxLikelihoodDir = outputDirectory + "experiments\\" + numOfExperiments + "\\";
                    bestNoTopics = nTopics.get(i);
                    bestBeta = beta.get(j);
                }
                System.out.println("loglikelihood = " + likelihood);
            }
        }

        File likelihoodsFile = new File(outputDirectory + "experiments\\logLikelihoods.txt");
        try {
            FileUtils.writeLines(likelihoodsFile, logLikelihoods);
        } catch (IOException ex) {
            Logger.getLogger(Evaluate.class.getName()).log(Level.SEVERE, null, ex);
        }
        System.out.println(
                "The best model has loglikelihood = " + maxLikelihood + " and can be found in " + maxLikelihoodDir);
        return maxLikelihoodDir;
    }

    public int getBestNoTopics() {
        return bestNoTopics;
    }
}