Java tutorial
/* * Copyright 2015 Konstantinos Papangelou. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.mythesis.profileanalysis; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.Random; import java.util.logging.Level; import java.util.logging.Logger; import org.apache.commons.io.FileUtils; /** * Apply LDA with different parameters and get the best model * @author Konstantinos Papangelou */ public class Evaluate { private String outputDirectory; private List<Integer> nTopics; private List<Double> alpha; private List<Double> beta; int niters; int top_words; List<String> trainingSet; List<String> testSet; List<String> logLikelihoods; int bestNoTopics; double bestBeta; public Evaluate(String outputDirectory, List<Integer> nTopics, List<Double> beta, int niters, int top_words, String LDAdir, double testSplit) { this.outputDirectory = outputDirectory; this.nTopics = new ArrayList<>(); this.alpha = new ArrayList<>(); this.beta = new ArrayList<>(); this.nTopics = nTopics; for (Integer nTopic : nTopics) { this.alpha.add((double) (50 / nTopic)); } this.beta = beta; this.niters = niters; this.top_words = top_words; logLikelihoods = new ArrayList<>(); trainingSet = new ArrayList<>(); testSet = new ArrayList<>(); File ldaFile = new File(LDAdir); List<String> pages = new ArrayList<>(); try { pages = FileUtils.readLines(ldaFile); } catch (IOException ex) { Logger.getLogger(Evaluate.class.getName()).log(Level.SEVERE, null, ex); } pages.remove(0); //shuffle pages Collections.shuffle(pages); int testSetSize = (int) (pages.size() * testSplit); int trainingSetSize = pages.size() - testSetSize; int counter = 0; Random rn = new Random(); testSet.add(String.valueOf(testSetSize)); trainingSet.add(String.valueOf(trainingSetSize)); while (counter < testSetSize) { int index = rn.nextInt(pages.size()); if (!testSet.contains(pages.get(index))) { testSet.add(pages.get(index)); pages.remove(index); counter++; } } trainingSet.addAll(pages); } /** * a method tha performs LDA with different parameters and finds the best model using log likelihood estimation * @return the directory that contains the parameters of the best model */ public String performLDAevaluation() { Metrics met = new Metrics(); int numOfExperiments = 0; double maxLikelihood = 0; String maxLikelihoodDir = ""; System.out.println("I will perform " + beta.size() * nTopics.size() + " experiments..."); for (int i = 0; i < nTopics.size(); i++) { for (int j = 0; j < beta.size(); j++) { numOfExperiments++; File trainingFile = new File( outputDirectory + "experiments\\" + numOfExperiments + "\\LDAtrain.txt"); File testFile = new File(outputDirectory + "experiments\\" + numOfExperiments + "\\LDAtest.txt"); try { FileUtils.writeLines(trainingFile, trainingSet); FileUtils.writeLines(testFile, testSet); } catch (IOException ex) { Logger.getLogger(Evaluate.class.getName()).log(Level.SEVERE, null, ex); } LDAcall ld = new LDAcall(); ld.call(nTopics.get(i), alpha.get(i), beta.get(j), niters, top_words, outputDirectory + "experiments\\" + numOfExperiments + "\\", true, "LDAtrain.txt"); ld.call(nTopics.get(i), alpha.get(i), beta.get(j), niters, top_words, outputDirectory + "experiments\\" + numOfExperiments + "\\", false, "LDAtest.txt"); double likelihood = met.getLogLikelihood( outputDirectory + "experiments\\" + numOfExperiments + "\\", "LDAtest.txt"); logLikelihoods.add(String.valueOf(likelihood) + "," + String.valueOf(nTopics.get(i)) + "," + String.valueOf(alpha.get(i)) + "," + String.valueOf(beta.get(j))); if (numOfExperiments == 1) { maxLikelihood = likelihood; maxLikelihoodDir = outputDirectory + "experiments\\" + numOfExperiments + "\\"; bestNoTopics = nTopics.get(i); bestBeta = beta.get(j); } else if (likelihood > maxLikelihood) { maxLikelihood = likelihood; maxLikelihoodDir = outputDirectory + "experiments\\" + numOfExperiments + "\\"; bestNoTopics = nTopics.get(i); bestBeta = beta.get(j); } System.out.println("loglikelihood = " + likelihood); } } File likelihoodsFile = new File(outputDirectory + "experiments\\logLikelihoods.txt"); try { FileUtils.writeLines(likelihoodsFile, logLikelihoods); } catch (IOException ex) { Logger.getLogger(Evaluate.class.getName()).log(Level.SEVERE, null, ex); } System.out.println( "The best model has loglikelihood = " + maxLikelihood + " and can be found in " + maxLikelihoodDir); return maxLikelihoodDir; } public int getBestNoTopics() { return bestNoTopics; } }