no.uio.medicine.virsurveillance.parserTests.SaxTest.java Source code

Java tutorial

Introduction

Here is the source code for no.uio.medicine.virsurveillance.parserTests.SaxTest.java

Source

/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools Templates
 * and open the template in the editor.
 */
package no.uio.medicine.virsurveillance.parserTests;

import no.uio.medicine.virsurveillance.parsers.SaxXMLProcess;
import java.io.*;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.logging.Level;
import java.util.logging.Logger;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import no.uio.medicine.virsurveillance.DDBB.SQLManagement;
import no.uio.medicine.virsurveillance.DDBB.SQLQueries;
import no.uio.medicine.virsurveillance.datamodels.PubmedArticle;
import no.uio.medicine.virsurveillance.datamodels.PubmedAuthor;
import no.uio.medicine.virsurveillance.datamodels.QueryResult;
import no.uio.medicine.virsurveillance.parsers.CsvWosParser;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVParser;
import org.apache.commons.csv.CSVRecord;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;

/**
 *
 * @author Albert
 */
public class SaxTest {

    private static String JCS_CSV_FOLDER = "/Users/apla/Documents/Virus Suirvellance/Data/ThomsonReuters/";

    private static String JCS_CSV_FILE = "/Users/apla/Documents/Virus Suirvellance/Data/ThomsonReuters/SCI+SCIE2014.csv";
    private static String JCS_CSV_FILE2 = "/Users/apla/Documents/Virus Suirvellance/Data/ThomsonReuters/SCI+SCIE2013.csv";
    private static String PUBMED_SAMPLE_XML_FILE_NAME = "/Users/apla/Documents/Virus Suirvellance/Data/bigpubmed.xml";
    private static int batchSize = 10000;

    //This is intended to test different xml parsing tools to deal with the PubMed database
    //http://stackoverflow.com/questions/2134507/fast-lightweight-xml-parser
    /**
     * @param args the command line arguments
     */
    public static void main(String[] args) throws ParserConfigurationException, SAXException, IOException {

        try {
            //createAndLoadData();
            performImpactFactorQueries();
        } catch (Exception ex) {
            Logger.getLogger(SaxTest.class.getName()).log(Level.SEVERE, null, ex);
        }

    }

    private static void createDatabaseSchema() {
        try {
            SQLManagement sqlM = new SQLManagement();
            sqlM.connect2DB();
            sqlM.createSchema();

            sqlM.closeDB();
        } catch (SQLException ex) {
            Logger.getLogger(SaxTest.class.getName()).log(Level.SEVERE, null, ex);
        } catch (ClassNotFoundException ex) {
            Logger.getLogger(SaxTest.class.getName()).log(Level.SEVERE, null, ex);
        } catch (IOException ex) {
            Logger.getLogger(SaxTest.class.getName()).log(Level.SEVERE, null, ex);
        }
    }

    private static void loadXMLDatabase() {
        try {
            SQLManagement sqlM = new SQLManagement();
            sqlM.connect2DB();

            //Load the file
            File inputFile = new File(PUBMED_SAMPLE_XML_FILE_NAME);
            //Create the parser
            SAXParserFactory spf = SAXParserFactory.newInstance();
            SAXParser myXMLParser = spf.newSAXParser();
            SaxXMLProcess myHandler = new SaxXMLProcess(batchSize, sqlM);
            myXMLParser.parse(inputFile, myHandler);
            //myHandler.printTitles();
            //xr.
            //cosa=xr.parse(PUBMED_SAMPLE_XML_FILE_NAME);*/

            sqlM.closeDB();
            //System.out.println(sqlM.addAuthor2DB(new PubmedAuthor("Coyote","John")));
        } catch (SQLException ex) {
            Logger.getLogger(SaxTest.class.getName()).log(Level.SEVERE, null, ex);
        } catch (Exception ex) {
            Logger.getLogger(SaxTest.class.getName()).log(Level.SEVERE, null, ex);
        }
    }

    private static void loadCSVJournalFiles(String file) {
        try {
            SQLManagement sqlM = new SQLManagement();
            sqlM.connect2DB();

            CsvWosParser myParser = new CsvWosParser(sqlM, batchSize);
            myParser.parse(file);
        } catch (SQLException ex) {
            Logger.getLogger(SaxTest.class.getName()).log(Level.SEVERE, null, ex);
        } catch (ClassNotFoundException ex) {
            Logger.getLogger(SaxTest.class.getName()).log(Level.SEVERE, null, ex);
        }

    }

    private static void createAndLoadData() {
        /*
        createDatabaseSchema();
        long tStart = System.currentTimeMillis();
        loadXMLDatabase();
        long tEnd = System.currentTimeMillis();
        System.out.println("Time use to parse and load the xml: "+((tEnd - tStart)/1000.0));
            
        //System.out.println("Hit a key to update the CSV journal list");
        //System.in.read();
        //loadCSVJournalFiles(JCS_CSV_FILE);
        //loadCSVJournalFiles(JCS_CSV_FILE2);
            
        for (int i=1997;i<2015;i++){
        System.out.println(JCS_CSV_FOLDER+"SCI+SCIE"+i+".csv");
        loadCSVJournalFiles(JCS_CSV_FOLDER+"SCI+SCIE"+i+".csv");
        }*/
        //loadCSVJournalFiles(JCS_CSV_FOLDER+"SCI+SCIE2015temp.csv");
        //loadCSVJournalFiles(JCS_CSV_FOLDER+"SCI+SCIE2016temp.csv");
    }

    private static void performImpactFactorQueries() throws SQLException, ClassNotFoundException, IOException {
        String output = "/Users/apla/Documents/Virus Suirvellance/outputs/journalIfacs";
        try {
            new File(output).mkdir();
        } catch (Exception e) {

        }

        SQLQueries sqlQuery = new SQLQueries();
        sqlQuery.connect2DB();

        QueryResult result0 = sqlQuery.getJournalImpactFactors();
        result0.getBoxplotChart().updateChartData();
        result0.save2File(output + "/generalIfac");
        System.out.println("General Informationi obtained");

        for (int i = -1; i < 2; i++) {
            String auxStr = "variableIfac";
            if (i == -1) {
                auxStr = "DecreasingIfac";
            } else if (i == 1) {
                auxStr = "IncreasingIFac";
            }

            QueryResult result1 = sqlQuery.getJournalWithVariableImpactFactors(5, i);
            result1.getBoxplotChart().updateChartData();
            ArrayList<String> journals1 = new ArrayList<>();
            for (String journalName : result1.getValues().get("abbreviated_title")) {
                if (!journalName.equalsIgnoreCase("null") && !journals1.contains(journalName)) {
                    journals1.add(journalName);

                }
            }
            QueryResult resultaux1 = sqlQuery.getJournalEvolution(journals1,
                    auxStr + " evolution of journals with IF var of 5");
            resultaux1.getXyLineChart().updateChartData();

            QueryResult result2 = sqlQuery.getJournalWithVariableImpactFactors(10, i);
            result2.getBoxplotChart().updateChartData();
            ArrayList<String> journals2 = new ArrayList<>();
            for (String journalName : result2.getValues().get("abbreviated_title")) {
                if (!journalName.equalsIgnoreCase("null") && !journals2.contains(journalName)) {
                    journals2.add(journalName);
                }
            }
            QueryResult resultaux2 = sqlQuery.getJournalEvolution(journals2,
                    auxStr + " evolution of journals with IF var of 10");
            resultaux2.getXyLineChart().updateChartData();

            QueryResult result3 = sqlQuery.getJournalWithVariableImpactFactors(20, i);
            result3.getBoxplotChart().updateChartData();
            ArrayList<String> journals3 = new ArrayList<>();
            for (String journalName : result3.getValues().get("abbreviated_title")) {
                if (!journalName.equalsIgnoreCase("null") && !journals3.contains(journalName)) {
                    journals3.add(journalName);
                }
            }
            QueryResult resultaux3 = sqlQuery.getJournalEvolution(journals3,
                    auxStr + " evolution of journals with IF var of 20");
            resultaux3.getXyLineChart().updateChartData();

            result1.save2File(output + "/" + auxStr + "IfacVar5");
            result2.save2File(output + "/" + auxStr + "IfacVar10");
            result3.save2File(output + "/" + auxStr + "IfacVar20");

            resultaux1.save2File(output + "/" + auxStr + "Evolution_IfacVar5");
            resultaux2.save2File(output + "/" + auxStr + "Evolution_IfacVar10");
            resultaux3.save2File(output + "/" + auxStr + "Evolution_IfacVar20");

            System.out.println(auxStr + " PerfomImpactFactorQueries Finished");
        }

    }

    private static void performJournalEvolutionQueries() throws SQLException, ClassNotFoundException, IOException {
        SQLQueries sqlQuery = new SQLQueries();
        sqlQuery.connect2DB();

        ArrayList<String> journals = new ArrayList<>();

        QueryResult resultAux = sqlQuery.getJournalWithVariableImpactFactors(15, -1);
        int max = 100;
        int count = 0;
        for (String journalName : resultAux.getValues().get("abbreviated_title")) {
            if (!journalName.equalsIgnoreCase("null") && !journals.contains(journalName) && count < max) {
                journals.add(journalName);
                count++;
            }

        }

        //journals.add("lancet");
        QueryResult result0 = sqlQuery.getJournalEvolution(journals);
        result0.getXyLineChart().updateChartData();

        //result0.save2File(output+"/generalIfac");
        //System.out.println("PerfomImpactFactorQueries Finished");
    }

    private static void performVirusQueries() throws SQLException, ClassNotFoundException, IOException {
        SQLQueries sqlQuery = new SQLQueries();

        String inputVirusFile = "/Users/apla/Documents/Virus Suirvellance/Data/Table_human_viruses.csv";
        ArrayList<String> viruses = getDataFromVirusCSV(inputVirusFile);

        for (String topic : viruses) {

            QueryResult result0 = sqlQuery.getStatsPerTopicIgnoringIFUnder(topic, 0);
            result0.printResult();
            result0.getXyLineChart().updateChartData();
            result0.save2File("/Users/apla/Documents/Virus Suirvellance/outputs/ifPerYear4copy/" + topic);

            //QueryResult result = sqlQuery.getStatsPerTopicIgnoringIFUnder(topic,0);
            QueryResult result = sqlQuery.getPublicationCount(topic);
            result.printResult();
            if (result.getXyLineChart() != null) {
                result.getXyLineChart().updateChartData();
            }
            if (result.getStackedChart() != null) {
                result.getStackedChart().updateChartData();
            }
            result.save2File("/Users/apla/Documents/Virus Suirvellance/outputs/pubsPerYear4copy/" + topic);

            QueryResult result2 = sqlQuery.getPublicationCountSelfNormalized(topic);
            result2.printResult();
            if (result2.getXyLineChart() != null) {
                result2.getXyLineChart().updateChartData();
            }
            if (result2.getStackedChart() != null) {
                result2.getStackedChart().updateChartData();
            }
            result2.save2File("/Users/apla/Documents/Virus Suirvellance/outputs/pubsPerYearSN4copy/" + topic);

            QueryResult result3 = sqlQuery.getRelativePublicationCount(topic);
            result3.printResult();
            if (result3.getXyLineChart() != null) {
                result3.getXyLineChart().updateChartData();
            }
            if (result3.getStackedChart() != null) {
                result3.getStackedChart().updateChartData();
            }
            result3.save2File("/Users/apla/Documents/Virus Suirvellance/outputs/pubsPerYearRel4copy/" + topic);
        }

    }

    private static ArrayList<String> getDataFromVirusCSV(String inputVirusFile)
            throws FileNotFoundException, IOException {
        ArrayList<String> viruses = new ArrayList<>();

        BufferedReader reader = new BufferedReader(new FileReader(inputVirusFile));
        CSVParser parser = CSVFormat.RFC4180.withDelimiter(';').withIgnoreEmptyLines().withHeader().parse(reader);
        for (CSVRecord csvRecord : parser) {
            if (csvRecord.isMapped("Virus Name")) {
                String currentVirus = csvRecord.get("Virus Name").replace(" virus", "").replace("", "%%");
                viruses.add(currentVirus);
            }
        }

        return viruses;

    }

}