Java tutorial
package citation_prediction; import java.awt.Color; import java.awt.Font; import java.awt.Rectangle; import java.awt.Robot; import java.awt.image.BufferedImage; import java.io.*; import java.util.ArrayList; import java.util.Arrays; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.List; import java.util.Map.Entry; import java.util.Scanner; import java.lang.Math; import javax.imageio.ImageIO; import javax.swing.JFrame; import org.apache.commons.csv.CSVFormat; import org.apache.commons.csv.CSVParser; import org.apache.commons.csv.CSVRecord; import org.apache.commons.math3.distribution.NormalDistribution; import org.apache.commons.math3.linear.MatrixUtils; import org.apache.commons.math3.linear.RealMatrix; import org.apache.commons.math3.linear.LUDecomposition; import org.apache.commons.math3.linear.DecompositionSolver; import org.math.plot.Plot2DPanel; import org.math.plot.plotObjects.BaseLabel; /************************ * For additional information about this source code please visit:<br> * http://josiahneuberger.github.io/citation_prediction/ *<br> * Collaborative Research Project between University of Mary Washington and * the Dahlgren Naval Surface Warfare Center. *<br> * Dahlgren Naval Surface Warfare Center<br> * 1. Dr. Jeff Solka <br> * 2. Dr. Allen D. Parks <br> * 3. Computer Scientist Kristen Ash<br> * * * University of Mary Washington Faculty: <br> * 1. Dr. Melody Denhere (Math) Supervisor Faculty <br> * 2. Dr. Debra Hydorn (Math)<br> * 3. Dr. Stephen Davies (Computer Science)<br> * * Student Researchers:<br> * 1. William Etcho (Math)<br> * 2. Josiah Neuberger (Computer Science)<br> * * * *<p> * Brief Description:<br> * This project will explore the concept of predicting future citations based * on a citation history of a given paper. The project relies on the initial * research of Wang, Song, and Barabasi, which is outlined in their paper titled: * "Quantifying Long-Term Scientific Impact" published in "Science 342, 127 (2013)". *<br> * The prediction is based on three different qualities: * 1. Preferred attachment * 2. Age Decay * 3. Fitness *<p> * These three qualities are represented across three variables (The WSB Triple: Wang-Song-Barabsi) *<br> * 1. (mu) - "indicates immediacy, governing the time for a paper to reach its citation peak (Science pg 128)" * 2. (sigma) - "is longevity, capturing the decay rate (Science pg 128)" * 3. (lambda) - relative fitness, which is related to '' the paper's fitness which "captures * the inherent differences between papers, accounting for the perceived novelty and * importance of a discovery. (Science pg 128-129)" *<p> * This Java program will solve the WSB triple by using the general math developed by Dr. Parks to implement * the Newton-Raphson method of numeric analysis applied to the three non-linear equations found in the WSB * paper. The WSB Triple along with equation #3 on page 129 of the WSB paper will be used to predict future * citations of a paper based on a segment of the paper's citation history. *<p> * This class makes use of several packages of the Apache Commons Library (Apache License, V2.0): http://commons.apache.org/ *<br> * Specifically:<br> * Math.Linear (RealMatrix, LUDecomposition, DecompositionSolver, MatrixUtils) * Math.Distribution (NormalDistributation: CDF and PDF functions) * CSV.CSVRecord * CSV.CSVParser * Note: the commons.csv package is part of the Sandbox package and therefore is considered in beta with no official releases. * this class uses the most recent build to extract data from a tabbed delimited csv file. *<br> * The graphs are all done using the jmathplot library (BSD 3-clause License), which can be found at:<br> * http://code.google.com/p/jmathplot/ * * <br><br> * License:<br> * <br> * Copyright 2014 Josiah Neuberger & William Etcho<br> * Licensed under the Apache License, Version 2.0 (the "License");<br> * you may not use this file except in compliance with the License.<br> * You may obtain a copy of the License at<br> *<br> * http://www.apache.org/licenses/LICENSE-2.0<br> *<br> * Unless required by applicable law or agreed to in writing, software<br> * distributed under the License is distributed on an "AS IS" BASIS,<br> * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.<br> * See the License for the specific language governing permissions and<br> * limitations under the License.<br> */ public class CitationCore { private NormalDistribution nd = new NormalDistribution(); public static void main(String[] args) throws IOException { CitationCore.CitationCoreTest cct = new CitationCore().new CitationCoreTest(new Scanner(System.in)); cct.run_user_tests(); } /** * This is a basic test class that allows for the creation of various test, which can be * added to the 'tests' variable structure for presentation to the user during execution. * * * */ public class CitationCoreTest { public LinkedHashMap<Integer, TestCase> tests; private Scanner s; CitationCoreTest(Scanner s) { tests = new LinkedHashMap<Integer, TestCase>(); s = new Scanner(System.in); this.s = s; int testIndex = 0; //Add test here //uncomment this code so you can run test on many versions of 'm' one after the other. /*tests.put(++testIndex, new TestCase_AutoSearchData(s,"",1,2.9)); tests.put(++testIndex, new TestCase_AutoSearchData(s,"",1,20)); tests.put(++testIndex, new TestCase_AutoSearchData(s,"",1,30)); tests.put(++testIndex, new TestCase_AutoSearchData(s,"",1,50)); tests.put(++testIndex, new TestCase_AutoSearchData(s,"",1,70)); tests.put(++testIndex, new TestCase_AutoSearchData(s,"",1,100));*/ double m = 30; tests.put(++testIndex, new TestCase_AutoSearchData(s, "", 1, m)); tests.put(++testIndex, new TestCase_AutoSearchData(s, "(all papers with NO pause) ::", 1, m, true, false)); tests.put(++testIndex, new TestCase_AutoSearchData(s, "(all papers with pause) ::", 1, m, true, true)); } /** * Print a menu for a user to selet tests to run. */ public void run_user_tests() { boolean quit = false; int test_number = 0; do { System.out.println("***************CitationCore Menu of Test Cases********************"); System.out.println("0: Quit"); for (Entry<Integer, TestCase> e : this.tests.entrySet()) { System.out.println(e.getKey() + ": " + e.getValue().name + " Test "); } System.out.println("******************************************************************"); System.out.println("Please choose a menu item: "); test_number = s.nextInt(); if (test_number != 0) { if (tests.containsKey(test_number)) { this.tests.get(test_number).run_test(); } else { System.err.println("ERROR: The number you inputed does not match a menu item."); } } } while (test_number != 0); s.close(); } /** * Override this class to create a new testCase. * You must implement the function run_test. * * @author Josiah Neuberger */ public abstract class TestCase { public String name; public String type; private CitationCore cc; private double[][] data; static final String TYPE_USER_DRIVEN = "user driven"; static final String TYPE_STAND_ALONE = "stand alone"; TestCase(String name, String type) { this.name = name; this.type = type; this.cc = new CitationCore(); } /** * This function is where your test code goes. * @return Should return true if test passed and false otherwise. */ public abstract boolean run_test(); } /** * This class implements a test case which will allow a user to select paper(s) from * a file to run the NRC Search algorithm on. The test will attempt to find WSB solutions * for 5 year, 10 year, and all years of citation data available. A graph will be * presented to the user and saved for later reference. * * * @author Josiah Neuberger * */ private class TestCase_AutoSearchData extends TestCase { Scanner s; double step; double m; boolean runAllPapers; boolean pauseBetweenPapers; TestCase_AutoSearchData(Scanner s, String prependToTitle, double step, double m) { super(prependToTitle + " NRC Search with step=" + step + ", m=" + m, TestCase.TYPE_USER_DRIVEN); this.runAllPapers = false; this.pauseBetweenPapers = true; this.step = step; this.m = m; this.s = s; } TestCase_AutoSearchData(Scanner s, String prependToTitle, double step, double m, boolean runAllPapers, boolean pauseBetweenPapers) { super(prependToTitle + " NRC Search with step=" + step + ", m=" + m, TestCase.TYPE_USER_DRIVEN); this.runAllPapers = runAllPapers; this.pauseBetweenPapers = pauseBetweenPapers; this.step = step; this.m = m; this.s = s; } public boolean run_test() { String solutionString = ""; String dirname = "papers/"; String nl = System.getProperty("line.separator"); File fdir = new File(dirname); ArrayList<String> filenames = new ArrayList<String>(Arrays.asList(fdir.list())); //Get available files to parse for paper's citation history int i = 0; for (String name : filenames) { System.out.println(i + ": " + name); i++; } System.out.println("Please choose a file to parse for papers: "); int filenumber_input = s.nextInt(); assert ((filenumber_input >= 0) && (filenumber_input < filenames.size())); List<CSVRecord> papersfromfile = CitationCore.getCSVData(dirname + filenames.get(filenumber_input), CSVFormat.DEFAULT, false); List<CSVRecord> papers; int pn; //Either run all the papers in the selected file or // let the user choose a paper. if (!runAllPapers) { System.out.println("Please choose a paper #: "); int papernumber_input = s.nextInt(); papers = papersfromfile.subList(papernumber_input - 1, papernumber_input); pn = papernumber_input - 1; } else { papers = papersfromfile; pn = 0; } //process papers for WSB solutions and present graphs for (int p = 0; p < papers.size(); p++) { pn++; //The actual number of the paper from the file //Extract citation history and reformat for NRM for this paper String paper = "Filename: " + dirname + filenames.get(filenumber_input) + " (papers indexed as 1,2,3....)"; double[][] data5 = CitationCore.fixData(papers.get(p), 5); double[][] data10 = CitationCore.fixData(papers.get(p), 10); double[][] dataAll = CitationCore.fixData(papers.get(p), 0); //Find WSB solutions ArrayList<LinkedHashMap<String, Double>> solutions5 = CitationCore .newtonRaphson_ConvergenceTest(data5, step, m); ArrayList<LinkedHashMap<String, Double>> solutions10 = CitationCore .newtonRaphson_ConvergenceTest(data10, step, m); ArrayList<LinkedHashMap<String, Double>> solutionsAllData = CitationCore .newtonRaphson_ConvergenceTest(dataAll, step, m); solutionString += "P#" + pn + "(train=5):: " + solutions5.toString() + nl; solutionString += "P#" + pn + "(train=10):: " + solutions10.toString() + nl; solutionString += "P#" + pn + "(train=All):: " + solutionsAllData.toString() + nl; System.out.println("************************P#" + pn + " Solutions**************************"); System.out.print("Solutions (5 years of training): " + solutions5.toString() + "\nSolutions (10 years of training):" + solutions10.toString() + "\nSolutions (all years of training): " + solutionsAllData.toString() + "\n"); JFrame f = new JFrame(); Plot2DPanel plot = super.cc.graphWSB(dataAll, m, null, "m=" + m + ", " + paper, "p#" + pn + ": (train=All)--->" + solutionsAllData.toString(), solutionsAllData, false, f); super.cc.graphWSB(dataAll, m, plot, "m=" + m + ", " + paper, "p#" + pn + ": (train=10)--->" + solutions10.toString(), solutions10, false, f); super.cc.graphWSB(dataAll, m, plot, "m=" + m + ", " + paper, "p#" + pn + ": (train=5)--->" + solutions5.toString(), solutions5, true, f); //Get screenshot to save graph for later reference Rectangle screenRect = new Rectangle(0, 0, 1000, 800); try { System.err.println("WARNING: TAKING SCREEN CAPTURE. DON'T TOUCH ANYTHING"); Thread.sleep(4000); BufferedImage plot_screenshot = new Robot().createScreenCapture(screenRect); System.err.println("Thanks, we're done now."); File dir = new File("saved_plots/" + filenames.get(filenumber_input)); if (!dir.exists()) dir.mkdir(); String fn = filenames.get(filenumber_input); String fnid = fn.substring(0, fn.indexOf("_")); ImageIO.write(plot_screenshot, "jpg", new File(dir + "/" + fnid + "_" + pn + "_m" + m + ".jpg")); //plot.toGraphicFile(new File(dir +"/" + pn + ".png")); if (!pauseBetweenPapers) { f.dispose(); } } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } if (pauseBetweenPapers) { System.out.println("Please press enter to move onto the next paper."); s.nextLine(); } } try { //save the graph File dir = new File("saved_plots/" + filenames.get(filenumber_input)); if (!dir.exists()) dir.mkdir(); FileWriter bw = new FileWriter(dir + "/solutions.txt"); bw.write(solutionString); bw.close(); } catch (Exception e) { e.printStackTrace(); } return true; //User driven so this test always returns true. } } } /** * This function will take a list of WSB solutions and graph them. You can add solutions to the graph by saving * the return of the function the first time and passing it back in on the next call as the 'plot'. * * * @param data_in_days The citation data distributed mostly even in days (call fixdata function if data is in years). * @param m The average number of new references in each new paper for a journal. * @param plot The plot you would like the WSB solution plotted to. * @param graphTitle The title to display on the JPanel. * @param lineLegend The title of this curve. * @param wsbSolutions The list of WSB solutions to graph. * @param showGraph Display the graph to the user (generally called once all curves have been added.) * @param frame The frame to display the graph in. * @return A Panel containing the graph. */ public Plot2DPanel graphWSB(double[][] data_in_days, double m, Plot2DPanel plot, String graphTitle, String lineLegend, ArrayList<LinkedHashMap<String, Double>> wsbSolutions, boolean showGraph, JFrame frame) { //source of library: http://code.google.com/p/jmathplot/ int plotLength = data_in_days.length + (365 * 3); double[][] data_in_years = new double[plotLength][2]; //Translate the data into years instead of days for (int i = 0; i < data_in_days.length; i++) { data_in_years[i][0] = data_in_days[i][0] / 365; data_in_years[i][1] = data_in_days[i][1]; } for (int i = data_in_days.length; i < plotLength; i++) { data_in_years[i][0] = data_in_years[i - 1][0] + .025; data_in_years[i][1] = data_in_years[data_in_days.length - 1][1]; } if (plot == null) { plot = new Plot2DPanel(); plot.addScatterPlot("Actual Citations", data_in_years); //Plot the data } //Extract the timevalue column RealMatrix mdata = MatrixUtils.createRealMatrix(data_in_years); double[] tvalues = mdata.getColumn(0); double[] cvalues = new double[plotLength]; for (LinkedHashMap<String, Double> s : wsbSolutions) { //calculate their fitted y values for (int i = 0; i < plotLength; i++) { cvalues[i] = m * (Math.exp( s.get("lambda") * pnorm((Math.log(365 * tvalues[i]) - s.get("mu")) / s.get("sigma"))) - 1); } //Calculate the Ultimate Impact double c_impact = m * (Math.exp(s.get("lambda")) - 1); //plot the fit plot.addLinePlot("Ultimate Impact=" + c_impact + " :: " + lineLegend, tvalues, cvalues); } // put the PlotPanel in a JFrame, as a JPanel if (showGraph) { plot.setAxisLabel(0, "Time in Years"); plot.setAxisLabel(1, "Cumulative Citations"); //Uncomment these if you wish to have fixed axis. //plot.setFixedBounds(1, 0, 500); //plot.setFixedBounds(0, 0, 60); // Add the file Title BaseLabel filetitle = new BaseLabel(graphTitle, Color.GRAY, 0.5, 1.1); filetitle.setFont(new Font("Courier", Font.BOLD, 13)); plot.addPlotable(filetitle); plot.addLegend("SOUTH"); //JFrame frame = new JFrame(graphTitle); frame.setTitle(graphTitle); frame.setContentPane(plot); frame.setBounds(0, 0, 1000, 800); frame.setVisible(true); frame.repaint(); } return plot; } /** * This function calculates a bunch of values needed for each iteration of the Newton-Raphson numerical method for solving for a WSB solution. * * @param data The citation data in days. * @param mu The current iteration's mu value. * @param sigma The current iteration's sigma value. * @param m The constant value, which is determined by the average number of references in each new paper for a journal. * @param t The last time value in the citation history. * @param n The total number of citations for this paper. * @param iteration The iteration number. * @return A list containing all the calculated values for the Netwon-Raphson method for this iteration. */ private LinkedHashMap<String, Double> getIterationData(double[][] data, double mu, double sigma, double m, double t, double n, int iteration) { //Set variables to zero for each iteration that this function is called. double xt = 0; double xi = 0; double ti = 0; double mhat = m / n; double s_ln_ti = 0; double s_ln_ti_sqrd = 0; double s_xi = 0; double s_xi_sqrd = 0; double s_pnorm_xi = 0; double s_dnorm_xi = 0; double s_xi_dnorm_xi = 0; double s_xi_sqrd_dnorm_xi = 0; double s_xi_cubed_dnorm_xi = 0; //Find X of T xt = (Math.log(t) - mu) / sigma; //Sum the various values needed. for (int i = 0; i < n; i++) { ti = data[i][0]; xi = (Math.log(ti) - mu) / sigma; s_ln_ti += Math.log(ti); s_ln_ti_sqrd += Math.pow(Math.log(ti), 2); s_xi += xi; s_xi_sqrd += Math.pow(xi, 2); s_pnorm_xi += pnorm(xi); s_dnorm_xi += dnorm(xi); s_xi_dnorm_xi += xi * dnorm(xi); s_xi_sqrd_dnorm_xi += Math.pow(xi, 2) * dnorm(xi); s_xi_cubed_dnorm_xi += Math.pow(xi, 3) * dnorm(xi); } //Divide by the total number of citations to get the expected values. s_ln_ti /= n; s_ln_ti_sqrd /= n; s_xi /= n; s_xi_sqrd /= n; s_pnorm_xi /= n; s_dnorm_xi /= n; s_xi_dnorm_xi /= n; s_xi_sqrd_dnorm_xi /= n; s_xi_cubed_dnorm_xi /= n; LinkedHashMap<String, Double> r_list = new LinkedHashMap<String, Double>(); //Add them to our list. r_list.put("iteration", (double) iteration); r_list.put("t", t); r_list.put("xt", xt); r_list.put("n", n); r_list.put("m", m); r_list.put("mhat", mhat); r_list.put("mu", mu); r_list.put("sigma", sigma); r_list.put("s_ln_ti", s_ln_ti); r_list.put("s_ln_ti_sqrd", s_ln_ti_sqrd); r_list.put("s_xi", s_xi); r_list.put("s_xi_sqrd", s_xi_sqrd); r_list.put("s_pnorm_xi", s_pnorm_xi); r_list.put("s_dnorm_xi", s_dnorm_xi); r_list.put("s_xi_dnorm_xi", s_xi_dnorm_xi); r_list.put("s_xi_sqrd_dnorm_xi", s_xi_sqrd_dnorm_xi); r_list.put("s_xi_cubed_dnorm_xi", s_xi_cubed_dnorm_xi); return r_list; } /** * This function calculates a bunch of values needed for each iteration of the Newton-Raphson numerical method for solving for a WSB solution. * <br> * Uses some default values: * <br> * t - extracted from the citation history in 'data'<br> * n - extracted from the citation history in 'data'<br> * * @param data The citation data in days. * @param mu The current iteration's mu value. * @param sigma The current iteration's sigma value. * @param m A constant value, which is determined by the average number of references in each new paper for a journal. * @return A list containing all the calculated values for the Netwon-Raphson method for this iteration. */ private LinkedHashMap<String, Double> getIterationData(double[][] data, double mu, double sigma, double m) { return getIterationData(data, mu, sigma, m, data[data.length - 1][0], data[data.length - 1][1], 0); } /** * This function calculates the essential formulas for the Newton-Raphson function. The formulas used here * were based on the math of Dr. Allen Parks. * * @param l A list structure that will hold the calculated values. * @return The list updated with the calculated values of the essential formulas. */ private LinkedHashMap<String, Double> getPartialsData(LinkedHashMap<String, Double> l) { double fn = (((1 + l.get("mhat")) * pnorm(l.get("xt")) - l.get("s_pnorm_xi")) * l.get("s_xi")) - l.get("s_dnorm_xi") + (1 + l.get("mhat")) * dnorm(l.get("xt")); l.put("fn", fn); double gn = (((1 + l.get("mhat")) * pnorm(l.get("xt")) - l.get("s_pnorm_xi")) * (l.get("s_xi_sqrd") - 1)) - l.get("s_xi_dnorm_xi") + ((1 + l.get("mhat")) * l.get("xt") * dnorm(l.get("xt"))); l.put("gn", gn); double df_dmu = (((1 + l.get("mhat")) * ((l.get("xt") - l.get("s_xi")) * dnorm(l.get("xt")) - pnorm(l.get("xt")))) + l.get("s_xi") * l.get("s_dnorm_xi") - l.get("s_xi_dnorm_xi") + l.get("s_pnorm_xi")) / l.get("sigma"); l.put("df_dmu", df_dmu); double df_dsigma = (((1 + l.get("mhat")) * ((l.get("xt") - l.get("s_xi")) * l.get("xt") * dnorm(l.get("xt")) - l.get("s_xi") * pnorm(l.get("xt")))) + l.get("s_xi") * (l.get("s_xi_dnorm_xi") + l.get("s_pnorm_xi")) - l.get("s_xi_sqrd_dnorm_xi")) / l.get("sigma"); l.put("df_dsigma", df_dsigma); double dg_dmu = (((1 + l.get("mhat")) * (2 * l.get("s_xi") * pnorm(l.get("xt")) + (l.get("s_xi_sqrd") - Math.pow(l.get("xt"), 2)) * dnorm(l.get("xt")))) - (2 * l.get("s_xi") * l.get("s_pnorm_xi") + l.get("s_xi_sqrd") * l.get("s_dnorm_xi") - l.get("s_xi_sqrd_dnorm_xi"))) / (-l.get("sigma")); l.put("dg_dmu", dg_dmu); double dg_dsigma = (((1 + l.get("mhat")) * ((Math.pow(l.get("xt"), 3)) * dnorm(l.get("xt")) - l.get("s_xi_sqrd") * l.get("xt") * dnorm(l.get("xt")) - 2 * l.get("s_xi_sqrd") * pnorm(l.get("xt")))) + 2 * l.get("s_xi_sqrd") * l.get("s_pnorm_xi") + l.get("s_xi_sqrd") * l.get("s_xi_dnorm_xi") - l.get("s_xi_cubed_dnorm_xi")) / l.get("sigma"); l.put("dg_dsigma", dg_dsigma); return l; } /** * This function will print a list containing the calculated values for the Newton-Raphson method in * a formated way that is easy to read. * * @param l The list containing the Newton-Raphon calculated values. */ public void printList(LinkedHashMap<String, Double> l) { for (Entry<String, Double> e : l.entrySet()) { System.out.println(e.getKey() + "=" + e.getValue()); } } /** * This function implements the algorithm designed by Josiah Neuberger and William Etcho used to solve for a WSB solution. * The general math for the Newton-Raphson method was provided by Dr. Allen Parks and can be found in the function 'getPartialsData'. * <br><br> * This algorithm was created under the direction of Supervising University of Mary Washington faculty, Dr. Melody Denhere along with * consultation from Dr. Jeff Solka and Computer Scientists Kristin Ash with the Dahlgren Naval Surface Warfare Center. * <br><br> * If your interested in more information about the research behind this algorithm please visit:<br> * http://josiahneuberger.github.io/citation_prediction/ * <br><br> * * @param data The citation data in days. * @param mu The initial mu guess to use in the Newton-Raphson method. * @param sigma The initial sigma guess to use in the Newton-Raphson method. * @param m The constant value, which is determined by the average number of references in each new paper for a journal. * @param t The last time value in the paper's citation history. * @param n The total number of citations for this paper. * @param l A list structure to store values for each iteration (should be null to start). * @param iteration The iteration (should be zero to start) * @param max_iteration The maximum number of iterations to try before stopping. * @param tolerance The tolerance level, which determines that a solution has been converged on. * @return A list containing the WSB solution of (lambda, mu, sigma, iterations). */ private LinkedHashMap<String, Double> newtonRaphson(double[][] data, double mu, double sigma, double m, double t, double n, LinkedHashMap<String, Double> l, int iteration, int max_iteration, double tolerance) { double lambda; LinkedHashMap<String, Double> r = new LinkedHashMap<String, Double>(); if (iteration > max_iteration) { System.out.println("Does not converge."); r.put("lambda", null); r.put("mu", null); r.put("sigma", null); r.put("iterations", null); return r; } else if (tolerance < 0.00000001) { System.out.println("Stopped due to tolerance."); r.put("lambda", getLambda(data, mu, sigma, m, t, n)); r.put("mu", mu); r.put("sigma", sigma); r.put("iterations", (double) iteration); return r; } else { l = getPartialsData(getIterationData(data, mu, sigma, m)); double[] array_xn = { mu, sigma }; double[] array_yn = { l.get("fn"), l.get("gn") }; RealMatrix xn = MatrixUtils.createColumnRealMatrix(array_xn); RealMatrix yn = MatrixUtils.createColumnRealMatrix(array_yn); //http://commons.apache.org/proper/commons-math/userguide/linear.html double[][] array_jacobian = { { l.get("df_dmu"), l.get("df_dsigma") }, { l.get("dg_dmu"), l.get("dg_dsigma") } }; RealMatrix jacobian = MatrixUtils.createRealMatrix(array_jacobian); LUDecomposition lud = new LUDecomposition(jacobian); DecompositionSolver decS = lud.getSolver(); l.put("iteration", (double) (iteration + 1)); //DEBUG: printList(l); if (!decS.isNonSingular()) { l.put("iteration", (double) (max_iteration + 1)); System.err.println("ERROR: Jacobian matrix was singular."); } else { RealMatrix solution = xn.subtract(decS.getInverse().multiply(yn)); RealMatrix xndiff = solution.subtract(xn); tolerance = Math.sqrt(Math.pow(xndiff.getEntry(0, 0), 2) + Math.pow(xndiff.getEntry(1, 0), 2)); //update values l.put("mu", solution.getEntry(0, 0)); l.put("sigma", solution.getEntry(1, 0)); //DEBUG: System.out.printf("\"%-20s=%25f\"\n", "NEW MU", l.get("mu")); //DEBUG: System.out.printf("\"%-20s=%25f\"\n", "NEW SIGMA", l.get("sigma")); } //DEBUG: System.out.println("****************************************"); return newtonRaphson(data, l.get("mu"), l.get("sigma"), m, l, (l.get("iteration").intValue()), tolerance); } } private LinkedHashMap<String, Double> newtonRaphson(double[][] data, double mu, double sigma, double m, LinkedHashMap<String, Double> l, int iteration, double tolerance) { return newtonRaphson(data, mu, sigma, m, data[data.length - 1][0], data[data.length - 1][1], l, iteration, 31, tolerance); } /** * This function implements the algorithm designed by Josiah Neuberger and William Etcho used to solve for a WSB solution. * The general math for the Newton-Raphson method was provided by Dr. Allen Parks and can be found in the function 'getPartialsData'. * <br><br> * This algorithm was created under the direction of Supervising University of Mary Washington faculty, Dr. Melody Denhere along with * consultation from Dr. Jeff Solka and Computer Scientists Kristin Ash with the Dahlgren Naval Surface Warfare Center. * <br><br> * If your interested in more information about the research behind this algorithm please visit:<br> * http://josiahneuberger.github.io/citation_prediction/ * <br><br> * This function uses the following default values:<br> * Max Iterations = 31<br> * * @param data The citation data in days. * @param mu The initial mu guess to use in the Newton-Raphson method. * @param sigma The initial sigma guess to use in the Newton-Raphson method. * @param m The constant value, which is determined by the average number of references in each new paper for a journal. * @return A list containing the WSB solution of (lambda, mu, sigma, iterations). */ public LinkedHashMap<String, Double> newtonRaphson(double[][] data, double mu, double sigma, double m) { return newtonRaphson(data, mu, sigma, m, data[data.length - 1][0], data[data.length - 1][1], null, 0, 31, .1); } /** * This function runs the Newton-Raphson function on an interval from .1 to 10 returning a list of * all the unique solutions. * * @param data The citation data in days. * @param step The step you would like to use to step through the interval of .1 to 10. * @param m The average number of new references contained in each paper for a journal. * @return A list containing the WSB solutions. */ public static ArrayList<LinkedHashMap<String, Double>> newtonRaphson_ConvergenceTest(double[][] data, double step, double m) { return newtonRaphson_ConvergenceTest(data, .1, 10, 10, step, m, false); } /** * This function runs the Newton-Raphson function on an interval from .1 to 10 returning a list of * all the unique solutions. * * @param data The citation data in days. * @param step The step you would like to use to step through the interval of .1 to 10. * @param m The average number of new references contained in each paper for a journal. * @return A list of list containing the WSB solutions. */ private static ArrayList<LinkedHashMap<String, Double>> newtonRaphson_ConvergenceTest(double[][] data, double start, double mu_guess, double sigma_guess, double step, double m, boolean wasAlreadyRun) { CitationCore cc = new CitationCore(); String[] matrix_headers = { "mu0", "sigma0", "lambda", "mu", "sigma", "iteration" }; ArrayList<ArrayList<Double>> matrix = new ArrayList<ArrayList<Double>>(100); ArrayList<LinkedHashMap<String, Double>> solutions = new ArrayList<LinkedHashMap<String, Double>>(100); ArrayList<Double> lambdas = new ArrayList<Double>(); for (double mu0 = start; mu0 < (mu_guess + 2); mu0 += step) { for (double sigma0 = start; sigma0 < (sigma_guess + 2); sigma0 += step) { LinkedHashMap<String, Double> answer = cc.newtonRaphson(data, mu0, sigma0, m); if (answer.get("lambda") != null) { ArrayList<Double> row = new ArrayList<Double>(); row.add(mu0); row.add(sigma0); row.add(answer.get("lambda")); row.add(answer.get("mu")); row.add(answer.get("sigma")); row.add(answer.get("iterations")); matrix.add(row); boolean isUnique = true; for (double l : lambdas) { if ((answer.get("lambda") < 0) || Math.abs(l - answer.get("lambda")) < 1e-2) { isUnique = false; break; } } if (isUnique) { LinkedHashMap<String, Double> s = new LinkedHashMap<String, Double>(); s.put("lambda", answer.get("lambda")); s.put("mu", answer.get("mu")); s.put("sigma", answer.get("sigma")); solutions.add(s); } lambdas.add(answer.get("lambda")); } } } printMatrix(matrix, matrix_headers); System.out.println("Unique Solutions:"); System.out.println(solutions.toString()); if (!wasAlreadyRun && solutions.isEmpty()) return newtonRaphson_ConvergenceTest(data, start, mu_guess, sigma_guess, .1, m, true); else return solutions; } /** * Prints the citation history stored in 'l' in a formated way. * * @param l Citation History. * @param header Headers for columns. */ public static void printMatrix(ArrayList<ArrayList<Double>> l, String[] header) { int count = 0; System.out.print(count + ": "); for (Object h : header) { System.out.printf("%-20s", h); } System.out.println(""); for (ArrayList<Double> row : l) { count++; System.out.print(count + ": "); for (double d : row) { System.out.printf("%-20f", d); } System.out.println(""); } } /** * Calculate lambda based on a solution found in the Newton-Raphson method for mu and sigma. * * @param data The citation data in days. * @param mu The solution for mu. * @param sigma The solution for sigma. * @param m The average number of references for new papers for a journal. * @param t The last timestamp for the citation data. * @param n The total number of citations from the citation history. * @return The lambda value based on mu and sigma. */ public double getLambda(double[][] data, double mu, double sigma, double m, double t, double n) { double xt = (Math.log(t) - mu) / sigma; double mhat = m / n; double s_pnorm_xi = 0; for (int i = 0; i < n; i++) { double ti = data[i][0]; double xi = (Math.log(ti) - mu) / sigma; s_pnorm_xi += pnorm(xi); } s_pnorm_xi /= n; return 1 / (((1 + mhat) * pnorm(xt)) - (s_pnorm_xi)); } private double pnorm(double x) { return nd.cumulativeProbability(x); } private double dnorm(double x) { return nd.density(x); } /** * Get CSV citation history from a file. * * @param filename The filename and path containing the citation data. * @param format The format of the file. * @param hasHeader Does the file have a line with headings? * @return A record containing the csv information. */ private static List<CSVRecord> getCSVData(String filename, CSVFormat format, boolean hasHeader) { boolean error = true; List<CSVRecord> list_ourdata = null; try { FileReader ourdata = new FileReader(filename); CSVParser data_parser = new CSVParser(ourdata, format); list_ourdata = data_parser.getRecords(); if (hasHeader) { list_ourdata.remove(0); } //remove header file. Iterator<CSVRecord> list_iterator = list_ourdata.iterator(); for (int rowIndex = 0; rowIndex < list_ourdata.size(); rowIndex++) { CSVRecord record = list_iterator.next(); System.out.println("#" + (rowIndex + 1) + " " + record.toString()); } data_parser.close(); ourdata.close(); error = false; } catch (java.io.FileNotFoundException e) { System.err.println("ERROR: There was an error opening, reading, or parsing the input file."); System.err.println("ERROR:" + filename); error = true; } catch (java.io.IOException e) { System.err.println("ERROR: Could not close the parser or the input file."); error = true; } if (error || list_ourdata == null) { System.exit(1); return null; } else { return list_ourdata; } } /** * Fix the citation data, which is in years by translating the timestamps and citations to be in days. * * @param record The citation history in years. * @param limitToRows Limit the rows being processed. * @return The citation history in days. */ private static double[][] fixData(CSVRecord record, int limitToRows) { double[][] r = null; int citationCount = 0; int numberOfRowsToProcess = 0; Iterator<String> record_iterator = record.iterator(); record_iterator.next(); //move pass paper id record_iterator.next(); //move pass paper publish year if (limitToRows != 0) { numberOfRowsToProcess = limitToRows; r = new double[numberOfRowsToProcess + 1][2]; } else { numberOfRowsToProcess = record.size() - 2; r = new double[record.size() - 2][2]; } for (int rowIndex = 0; record_iterator.hasNext() && rowIndex < numberOfRowsToProcess; rowIndex++) { String citations_forthis_year = record_iterator.next(); r[rowIndex][0] = Double.valueOf(rowIndex); //timestamp r[rowIndex][1] = Double.valueOf(citations_forthis_year); //citation citationCount += r[rowIndex][1]; } return fixData(r, citationCount); } /** * Helper function to fix the citation data, which is in years by translating the timestamps and citations to be in days. * * @param record The citation history in years. * @param limitToRows Limit the rows being processed. * @return The citation history in days. */ private static double[][] fixData(double[][] data, int citationCount) { double r[][] = new double[citationCount][2]; int citations = 0; double timestamp = 0; for (int rowIndex = 0; rowIndex < data.length; rowIndex++) { for (int citationIndex = 0; citationIndex < data[rowIndex][1]; citationIndex++) { citations += 1; timestamp = data[rowIndex][0] + (citationIndex + 1) / data[rowIndex][1]; //We use the citations to index in because this loop is creating a timestamp for each citation entry. r[citations - 1][0] = 365 * timestamp; //convert to days r[citations - 1][1] = citations; } } return r; } /** * Convert citation history to a String object. * * @param data The citation history. * @return A string of the citation history. */ public static String toString(double[][] data) { String r = "["; for (int rowIndex = 0; rowIndex < (data.length - 1); rowIndex++) { r += "(" + (int) (Math.round(data[rowIndex][0])) + "," + data[rowIndex][1] + ")\n"; } //take care of last entry without adding newline r += "(" + (int) (Math.round(data[data.length - 1][0])) + "," + data[data.length - 1][1] + ")]"; return r; } }