at.ac.tuwien.inso.subcat.reporter.Reporter.java Source code

Java tutorial

Introduction

Here is the source code for at.ac.tuwien.inso.subcat.reporter.Reporter.java

Source

/* Reporter.java
 *
 * Copyright (C) 2014 Florian Brosch
 *
 * Based on work from Andreas Mauczka
 *
 * This program is developed as part of the research project
 * "Lexical Repository Analyis" which is part of the PhD thesis
 * "Design and evaluation for identification, mapping and profiling
 * of medium sized software chunks" by Andreas Mauczka at
 * INSO - University of Technology Vienna. For questions in regard
 * to the research project contact andreas.mauczka(at)inso.tuwien.ac.at
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License 2.0
 * as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 *
 * Author:
 *       Florian Brosch <flo.brosch@gmail.com>
 */

package at.ac.tuwien.inso.subcat.reporter;

import java.io.File;
import java.io.IOException;
import java.sql.ResultSet;
import java.sql.ResultSetMetaData;
import java.sql.SQLException;
import java.sql.Types;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;

import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.commons.cli.PosixParser;

import at.ac.tuwien.inso.subcat.config.Configuration;
import at.ac.tuwien.inso.subcat.config.ExporterConfig;
import at.ac.tuwien.inso.subcat.config.Parser;
import at.ac.tuwien.inso.subcat.config.ParserException;
import at.ac.tuwien.inso.subcat.config.SemanticException;
import at.ac.tuwien.inso.subcat.miner.Settings;
import at.ac.tuwien.inso.subcat.model.Model;
import at.ac.tuwien.inso.subcat.model.ModelPool;
import at.ac.tuwien.inso.subcat.model.Project;
import at.ac.tuwien.inso.subcat.model.ResultCallback;
import at.ac.tuwien.inso.subcat.utility.Lemmatizer;

public class Reporter {
    private List<ReportWriter> formatters;
    private Model model;

    public Reporter(Model model) {
        assert (model != null);

        this.model = model;

        // Supported Formats:
        formatters = new LinkedList<ReportWriter>();
        formatters.add(new CsvRFC4180ReporterWriter());
        formatters.add(new CsvEuropeanReporterWriter());
        formatters.add(new XmlReportWriter());
    }

    public List<ReportWriter> getWriters() {
        return formatters;
    }

    public void export(ExporterConfig config, Project project, int commitDictId, int bugDictId, Settings settings,
            ReportWriter formatter, String outputPath) throws ReporterException {
        assert (project != null);
        assert (project.getId() != null);
        assert (config != null);
        assert (settings != null);
        assert (formatter != null);
        assert (model != null);
        assert (config.getQuery() != null);

        Map<String, Object> vars = new HashMap<String, Object>();
        vars.put("project", project.getId());
        vars.put("commitDict", commitDictId);
        vars.put("bugDict", bugDictId);

        try {
            if (config.getWordStats()) {
                exportWordStats(config, project, commitDictId, bugDictId, settings, formatter, outputPath, vars);
            } else {
                exportRows(config, project, commitDictId, bugDictId, settings, formatter, outputPath, vars);
            }
        } catch (ReporterException e) {
            throw e;
        } catch (Exception e) {
            throw new ReporterException("Exporter: " + e.getMessage(), e);
        } finally {
            formatter.cleanup();
        }
    }

    private void exportWordStats(final ExporterConfig config, Project project, int commitDictId, int bugDictId,
            Settings settings, final ReportWriter formatter, String outputPath, Map<String, Object> vars)
            throws SQLException, Exception {
        formatter.init(project, settings, outputPath);
        model.rawForeach(config.getQuery(), vars, new ResultCallback() {

            @Override
            public void processResult(ResultSet res) throws SemanticException, SQLException, Exception {
                ResultSetMetaData meta = res.getMetaData();
                String[] titles = new String[meta.getColumnCount()];
                if (titles.length != 2) {
                    throw new SemanticException(
                            "semantic error: invalid column count, expected: (<string>, <string>)",
                            config.getStart(), config.getEnd());
                }

                if (meta.getColumnType(1) != Types.VARCHAR || meta.getColumnType(2) != Types.VARCHAR) {
                    throw new SemanticException(
                            "semantic error: invalid column type, expected: (<string>, <string>), got " + "(<"
                                    + meta.getColumnTypeName(1) + ">, <" + meta.getColumnTypeName(2) + ">)",
                            config.getStart(), config.getEnd());
                }

                Map<String, Map<String, Integer>> data = new HashMap<String, Map<String, Integer>>();
                Lemmatizer lemmatiser = new Lemmatizer();

                Set<String> categoryNames = new HashSet<String>();

                while (res.next()) {
                    String category = res.getString(1);
                    categoryNames.add(category);

                    List<String> lemma = lemmatiser.lemmatize(res.getString(2));

                    for (String word : lemma) {
                        Map<String, Integer> counter = data.get(word);
                        if (counter == null) {
                            counter = new HashMap<String, Integer>();
                            data.put(word, counter);
                        }

                        Integer wordCount = counter.get(category);
                        if (wordCount == null) {
                            wordCount = 0;
                        }

                        counter.put(category, wordCount + 1);
                    }
                }

                String[] header = new String[categoryNames.size() + 1];
                header[0] = "word";

                int i = 1;
                for (String catName : categoryNames) {
                    header[i] = catName;
                    i++;
                }

                formatter.writeHeader(header);

                for (Entry<String, Map<String, Integer>> entry : data.entrySet()) {
                    Map<String, Integer> scores = entry.getValue();
                    String[] row = new String[header.length];

                    row[0] = entry.getKey();
                    i = 1;
                    for (String cat : categoryNames) {
                        Integer score = scores.get(cat);
                        if (score == null) {
                            score = 0;
                        }
                        row[i] = score.toString();
                        i++;

                    }

                    formatter.writeSet(row);
                }

                formatter.writeFooter(header);
            }
        });
    }

    private void exportRows(ExporterConfig config, Project project, int commitDictId, int bugDictId,
            Settings settings, final ReportWriter formatter, String outputPath, Map<String, Object> vars)
            throws SQLException, Exception {
        formatter.init(project, settings, outputPath);
        model.rawForeach(config.getQuery(), vars, new ResultCallback() {

            @Override
            public void processResult(ResultSet res) throws SemanticException, SQLException, Exception {
                ResultSetMetaData meta = res.getMetaData();
                String[] titles = new String[meta.getColumnCount()];
                for (int i = 0; i < titles.length; i++) {
                    titles[i] = meta.getColumnLabel(i + 1);
                }

                formatter.writeHeader(titles);

                while (res.next()) {
                    String[] data = new String[titles.length];
                    for (int i = 0; i < data.length; i++) {
                        data[i] = res.getString(i + 1);
                    }

                    formatter.writeSet(data);
                }

                formatter.writeFooter(titles);
            }
        });
    }

    public static void main(String[] args) {
        Options options = new Options();
        options.addOption("h", "help", false, "Show this options");
        options.addOption("d", "db", true, "The database to process (required)");
        options.addOption("p", "project", true, "The project ID to process");
        options.addOption("P", "list-projects", false, "List all registered projects");
        options.addOption("C", "config", true, "A configuration file including reports");
        options.addOption("F", "list-formats", false, "List all supported output formats");
        options.addOption("f", "format", true, "Output format");
        options.addOption("R", "list-reports", false, "List all report types");
        options.addOption("r", "report", true, "Report type");
        options.addOption("o", "output", true, "Output path");
        options.addOption("c", "commit-dictionary", true, "The commit dictionary ID to use");
        options.addOption("b", "bug-dictionary", true, "The bug dictionary ID to use");
        options.addOption("D", "list-dictionaries", false, "List all dictionaries");
        options.addOption("v", "verbose", false, "Show details");

        at.ac.tuwien.inso.subcat.utility.Reporter errReporter = new at.ac.tuwien.inso.subcat.utility.Reporter(
                false);
        Settings settings = new Settings();
        boolean verbose = false;
        ModelPool pool = null;
        Model model = null;

        CommandLineParser parser = new PosixParser();

        try {
            CommandLine cmd = parser.parse(options, args);
            verbose = cmd.hasOption("verbose");

            if (cmd.hasOption("help")) {
                HelpFormatter formatter = new HelpFormatter();
                formatter.printHelp("postprocessor", options);
                return;
            }

            if (cmd.hasOption("db") == false) {
                errReporter.error("reporter", "Option --db is required");
                errReporter.printSummary();
                return;
            }

            if (cmd.hasOption("config") == false) {
                errReporter.error("reporter", "Option --config is required");
                errReporter.printSummary();
                return;
            }

            Configuration config = new Configuration();
            Parser configParser = new Parser();
            try {
                configParser.parse(config, new File(cmd.getOptionValue("config")));
            } catch (IOException e) {
                errReporter.error("reporter", "Could not read configuration file: " + e.getMessage());
                errReporter.printSummary();
                return;
            } catch (ParserException e) {
                errReporter.error("reporter", "Could not parse configuration file: " + e.getMessage());
                errReporter.printSummary();
                return;
            }

            if (cmd.hasOption("list-reports")) {
                int i = 1;

                for (ExporterConfig exconf : config.getExporterConfigs()) {
                    System.out.println("  (" + i + ") " + exconf.getName());
                    i++;
                }

                return;
            }

            File dbf = new File(cmd.getOptionValue("db"));
            if (dbf.exists() == false || dbf.isFile() == false) {
                errReporter.error("reporter", "Invalid database file path");
                errReporter.printSummary();
                return;
            }

            pool = new ModelPool(cmd.getOptionValue("db"), 2);
            pool.setPrintTemplates(verbose);
            model = pool.getModel();

            if (cmd.hasOption("list-formats")) {
                Reporter exporter = new Reporter(model);
                int i = 1;

                for (ReportWriter formatter : exporter.getWriters()) {
                    System.out.println("  (" + i + ") " + formatter.getLabel());
                    i++;
                }

                return;
            }

            if (cmd.hasOption("list-projects")) {
                for (Project proj : model.getProjects()) {
                    System.out.println("  " + proj.getId() + ": " + proj.getDate());
                }

                return;
            }

            Integer projId = null;
            if (cmd.hasOption("project") == false) {
                errReporter.error("reporter", "Option --project is required");
                errReporter.printSummary();
                return;
            } else {
                try {
                    projId = Integer.parseInt(cmd.getOptionValue("project"));
                } catch (NumberFormatException e) {
                    errReporter.error("reporter", "Invalid project ID");
                    errReporter.printSummary();
                    return;
                }
            }

            if (cmd.hasOption("output") == false) {
                errReporter.error("reporter", "Option --output is required");
                errReporter.printSummary();
                return;
            }

            String outputPath = cmd.getOptionValue("output");
            model = pool.getModel();
            Project project = model.getProject(projId);

            if (project == null) {
                errReporter.error("reporter", "Invalid project ID");
                errReporter.printSummary();
                return;
            }

            if (cmd.hasOption("list-dictionaries")) {
                List<at.ac.tuwien.inso.subcat.model.Dictionary> dictionaries = model.getDictionaries(project);
                for (at.ac.tuwien.inso.subcat.model.Dictionary dict : dictionaries) {
                    System.out
                            .println("  (" + dict.getId() + ") " + " " + dict.getContext() + " " + dict.getName());
                }
                return;
            }

            int bugDictId = -1;
            if (cmd.hasOption("bug-dictionary")) {
                try {
                    bugDictId = Integer.parseInt(cmd.getOptionValue("bug-dictionary"));
                    List<at.ac.tuwien.inso.subcat.model.Dictionary> dictionaries = model.getDictionaries(project);
                    boolean valid = false;

                    for (at.ac.tuwien.inso.subcat.model.Dictionary dict : dictionaries) {
                        if (dict.getId() == bugDictId) {
                            valid = true;
                            break;
                        }
                    }

                    if (valid == false) {
                        errReporter.error("reporter", "Invalid bug dictionary ID");
                        errReporter.printSummary();
                        return;
                    }
                } catch (NumberFormatException e) {
                    errReporter.error("reporter", "Invalid bug dictionary ID");
                    errReporter.printSummary();
                    return;
                }
            }

            int commitDictId = -1;
            if (cmd.hasOption("commit-dictionary")) {
                try {
                    commitDictId = Integer.parseInt(cmd.getOptionValue("commit-dictionary"));
                    List<at.ac.tuwien.inso.subcat.model.Dictionary> dictionaries = model.getDictionaries(project);
                    boolean valid = false;

                    for (at.ac.tuwien.inso.subcat.model.Dictionary dict : dictionaries) {
                        if (dict.getId() == commitDictId) {
                            valid = true;
                            break;
                        }
                    }

                    if (valid == false) {
                        errReporter.error("reporter", "Invalid commit dictionary ID");
                        errReporter.printSummary();
                        return;
                    }
                } catch (NumberFormatException e) {
                    errReporter.error("reporter", "Invalid commit dictionary ID");
                    errReporter.printSummary();
                    return;
                }
            }

            if (cmd.hasOption("format") == false) {
                errReporter.error("reporter", "Option --format is required");
                errReporter.printSummary();
                return;
            }

            Reporter exporter = new Reporter(model);
            ReportWriter writer = null;
            try {
                int id = Integer.parseInt(cmd.getOptionValue("format"));
                if (id < 1 || id > exporter.getWriters().size()) {
                    errReporter.error("reporter", "Invalid output format");
                    errReporter.printSummary();
                    return;
                }

                writer = exporter.getWriters().get(id - 1);
            } catch (NumberFormatException e) {
                errReporter.error("reporter", "Invalid output format");
                errReporter.printSummary();
                return;
            }

            ExporterConfig exporterConfig = null;
            if (cmd.hasOption("report") == false) {
                errReporter.error("reporter", "Option --report is required");
                errReporter.printSummary();
                return;
            } else {
                try {
                    int id = Integer.parseInt(cmd.getOptionValue("report"));
                    if (id < 1 || id > config.getExporterConfigs().size()) {
                        errReporter.error("reporter", "Invalid reporter ID");
                        errReporter.printSummary();
                        return;
                    }

                    exporterConfig = config.getExporterConfigs().get(id - 1);
                } catch (NumberFormatException e) {
                    errReporter.error("reporter", "Invalid reporter ID");
                    errReporter.printSummary();
                    return;
                }
            }

            exporter.export(exporterConfig, project, commitDictId, bugDictId, settings, writer, outputPath);
        } catch (ParseException e) {
            errReporter.error("reporter", "Parsing failed: " + e.getMessage());
            if (verbose == true) {
                e.printStackTrace();
            }
        } catch (ClassNotFoundException e) {
            errReporter.error("reporter", "Failed to create a database connection: " + e.getMessage());
            if (verbose == true) {
                e.printStackTrace();
            }
        } catch (SQLException e) {
            errReporter.error("reporter", "Failed to create a database connection: " + e.getMessage());
            if (verbose == true) {
                e.printStackTrace();
            }
        } catch (ReporterException e) {
            errReporter.error("reporter", "Reporter Error: " + e.getMessage());
            if (verbose == true) {
                e.printStackTrace();
            }
        } finally {
            if (model != null) {
                model.close();
            }
            if (pool != null) {
                pool.close();
            }
        }

        errReporter.printSummary();
    }
}