gate.util.reporting.DocTimeReporter.java Source code

Java tutorial

Introduction

Here is the source code for gate.util.reporting.DocTimeReporter.java

Source

/*
 *  DocTimeReporter.java
 *
 *  Copyright (c) 2008-2009, Intelius, Inc.
 *
 *  This file is part of GATE (see http://gate.ac.uk/), and is free
 *  software, licenced under the GNU Library General Public License,
 *  Version 2, June 1991 (in the distribution as file licence.html,
 *  and also available at http://gate.ac.uk/gate/licence.html).
 *
 *  Chirag Viradiya & Andrew Borthwick, 30/Sep/2009
 *
 *  $Id$
 */
package gate.util.reporting;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Date;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.StringTokenizer;
import java.util.Timer;
import java.util.TimerTask;
import java.util.Vector;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.io.IOUtils;

import gate.util.reporting.exceptions.BenchmarkReportExecutionException;
import gate.util.reporting.exceptions.BenchmarkReportFileAccessException;
import gate.util.reporting.exceptions.BenchmarkReportInputFileFormatException;
import gnu.getopt.Getopt;

/**
 * A reporter class to generate a report on time taken by each document within
 * given corpus.
 */
public class DocTimeReporter implements BenchmarkReportable {

    /** A File handle to input benchmark file. */
    private File benchmarkFile = new File("benchmark.txt");
    /** Report media. */
    private String printMedia = MEDIA_HTML;
    /** No of documents to be displayed against matching PRs. */
    private int maxDocumentInReport = 10;
    /** Search string, could be a PR name. */
    private String PRMatchingRegex = MATCH_ALL_PR_REGEX;
    /** A marker indicating the start of current logical run. */
    private String logicalStart = null;
    /** Path where to save the report file. */
    private File reportFile;

    /**
     * An HashSet containing names of the documents matching the given search
     * string.
     */
    private HashSet<String> allDocs = new HashSet<String>();
    /**
     * An HashSet containing PR names matching the search string. Used to display
     * in report header.
     */
    private HashSet<String> matchingPRs = new HashSet<String>();
    /** Total time taken by the given pipeline for the current logical run. */
    private float globalTotal = 0;
    /** A LinkedHashMap containing the documents matching the given PRs. */
    private LinkedHashMap<String, String> docContainer = new LinkedHashMap<String, String>();
    /**
     * Folder where the benchmark.txt files are created for specific pipeline log
     * entries.
     */
    private File temporaryDirectory;
    /** Name of the given pipeline */
    private String pipelineName = "";
    /** Status flag for normal exit. */
    private static final int STATUS_NORMAL = 0;
    /** Status flag for error exit. */
    private static final int STATUS_ERROR = 1;
    /** Chunk size in which file will be read */
    private static final int FILE_CHUNK_SIZE = 2000;
    /** An OS independent line separator */
    private static final String NL = System.getProperty("line.separator");
    /**
     * An integer containing the count of total valid log entries present in input
     * file provided.
     */
    public int validEntries = 0;

    /**
     * This string constant when set as print media indicates that the report is
     * printed in TEXT format.
     */
    public static final String MEDIA_TEXT = "text";
    /**
     * This string constant when set as print media indicates that the report is
     * printed in HTML format.
     */
    public static final String MEDIA_HTML = "html";

    /**
     * This integer constant when set as No of Docs indicates that the report have
     * all the documents matching a given PR.
     */
    public static final int ALL_DOCS = -1;

    /**
     * The default value for search string matching PRs for given run.
     */
    public static final String MATCH_ALL_PR_REGEX = "all_prs";

    /**
     * No argument constructor.
     */
    public DocTimeReporter() {
        // some initialisations
        initTmpDir();
    }

    /**
     * A constructor to be used while executing the tool from the command line.
     *
     * @param args array containing command line arguments.
     */
    DocTimeReporter(String[] args) {
        initTmpDir();
        parseArguments(args);
    }

    private void initTmpDir() {
        try {
            temporaryDirectory = File.createTempFile("benchmark-reports", "", null);
            if (!temporaryDirectory.delete() || !temporaryDirectory.mkdir()) {
                throw new IOException(
                        "Unable to create temporary directory.\n" + temporaryDirectory.getCanonicalPath());
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    /**
     * Calculates the total of the time taken by processing element at each leaf
     * level. Also calculates the difference between the actual time taken by the
     * resources and system noted time.
     *
     * @param reportContainer
     *          An Object of type LinkedHashMap<String, Object> containing the
     *          processing elements (with time in milliseconds) in hierarchical
     *          structure.
     * @return An Object containing modified hierarchical structure of processing
     *         elements with totals and All others embedded in it.
     */
    @SuppressWarnings("unchecked")
    @Override
    public Object calculate(Object reportContainer) {
        return sortHashMapByValues(doTotal((LinkedHashMap<String, Object>) reportContainer));
    }

    /**
     * Sorts LinkedHashMap by its values(natural descending order). keeps the
     * duplicates as it is.
     *
     * @param passedMap
     *          An Object of type LinkedHashMap to be sorted by its values.
     * @return An Object containing the sorted LinkedHashMap.
     */
    private LinkedHashMap<?, ?> sortHashMapByValues(LinkedHashMap<String, String> passedMap) {
        List<String> mapKeys = new ArrayList<String>(passedMap.keySet());
        List<String> mapValues = new ArrayList<String>(passedMap.values());

        Collections.sort(mapValues, new ValueComparator());
        Collections.sort(mapKeys);
        // Reversing the collection to sort the values in descending order
        Collections.reverse(mapValues);
        LinkedHashMap<String, String> sortedMap = new LinkedHashMap<String, String>();

        Iterator<String> valueIt = mapValues.iterator();
        while (valueIt.hasNext()) {
            String val = valueIt.next();
            Iterator<String> keyIt = mapKeys.iterator();
            while (keyIt.hasNext()) {
                String key = keyIt.next();
                String comp1 = passedMap.get(key).toString();
                String comp2 = val.toString();

                if (comp1.equals(comp2)) {
                    passedMap.remove(key);
                    mapKeys.remove(key);
                    sortedMap.put(key, val);
                    break;
                }
            }
        }
        return sortedMap;
    }

    /**
     * Computes the sub totals at each processing level.
     *
     * @param reportContainer
     *          An Object of type LinkedHashMap<String, Object> containing the
     *          processing elements (with time in milliseconds) in hierarchical
     *          structure.
     * @return An Object containing the LinkedHashMap with the element values
     *         totaled.
     */
    @SuppressWarnings("unchecked")
    private LinkedHashMap<String, String> doTotal(LinkedHashMap<String, Object> reportContainer) {
        LinkedHashMap<String, Object> myHash = reportContainer;
        Iterator<String> i = myHash.keySet().iterator();
        while (i.hasNext()) {
            String key = i.next();
            if (myHash.get(key) instanceof LinkedHashMap) {
                docContainer = doTotal((LinkedHashMap<String, Object>) (myHash.get(key)));
            } else {
                if (docContainer.get(key) == null) {
                    docContainer.put(key, (String) myHash.get(key));
                } else {
                    // Do total if value already exists
                    int val = Integer.parseInt(docContainer.get(key)) + Integer.parseInt((String) myHash.get(key));
                    docContainer.put(key, Integer.toString(val));
                }
            }
        }
        return docContainer;
    }

    /**
     * Prints a report as per the value provided for print media option.
     *
     * @param reportSource
     *          An Object of type LinkedHashMap<String, Object> containing the
     *          processing elements (with time in milliseconds) in hierarchical
     *          structure.
     * @param outputFile
     *          Path where to save the report.
     */
    @SuppressWarnings("unchecked")
    @Override
    public void printReport(Object reportSource, File outputFile) {
        if (printMedia.equalsIgnoreCase(MEDIA_TEXT)) {
            printToText(reportSource, outputFile);
        } else if (printMedia.equalsIgnoreCase(MEDIA_HTML)) {
            printToHTML((LinkedHashMap<String, Object>) reportSource, outputFile);
        }
    }

    /**
     * Prints benchmark report in text format.
     *
     * @param reportContainer
     *          An Object of type LinkedHashMap<String, Object> containing the
     *          document names (with time in milliseconds) in hierarchical
     *          structure.
     * @param outputFile
     *          An object of type File representing the output report file.
     */
    private void printToText(Object reportContainer, File outputFile) {
        ArrayList<String> printLines = new ArrayList<String>();
        @SuppressWarnings("unchecked")
        LinkedHashMap<String, Object> rcHash = (LinkedHashMap<String, Object>) reportContainer;
        String docs = "";
        if (maxDocumentInReport != ALL_DOCS) {
            if (allDocs.size() < maxDocumentInReport) {
                docs = Integer.toString(allDocs.size());
            } else {
                docs = Integer.toString(maxDocumentInReport);
            }

        } else {
            docs = "All";
        }
        printLines.add("=============================================================" + NL);
        if (PRMatchingRegex.equals(MATCH_ALL_PR_REGEX)) {
            printLines.add("Top " + docs + " expensive documents matching All PRs in " + pipelineName + NL);
        } else {
            if (matchingPRs.size() > 0) {
                printLines
                        .add("Top " + docs + " expensive documents matching following PRs in " + pipelineName + NL);
                for (String pr : matchingPRs) {
                    printLines.add("\t" + pr + NL);
                }
            } else {
                printLines.add(
                        "No PRs matched to search string \"" + getPRMatchingRegex() + "\"" + " in " + pipelineName);
                printLines.add(NL);
                printLines.add("=============================================================" + NL);
            }

        }
        if (allDocs.size() > 0) {
            printLines.add("=============================================================" + NL);
            printLines.add("Document Name" + "\t" + "Time (in seconds)" + "\t" + "%" + NL);
            printLines.add("-------------------------------------------------------------" + NL);
        }
        Iterator<String> i = rcHash.keySet().iterator();
        int count = 0;
        // Iterating over the report container
        while (i.hasNext()) {
            Object key = i.next();
            if (!((String) key).equals("total")) {
                int value = Integer.parseInt((String) rcHash.get(key));
                if (maxDocumentInReport == ALL_DOCS)
                    printLines.add(key + "\t" + value / 1000.0 + "\t"
                            + Math.round(((value / globalTotal) * 100) * 10) / 10.0 + NL);
                else if (count < maxDocumentInReport)
                    printLines.add(key + "\t" + value / 1000.0 + "\t"
                            + Math.round(((value / globalTotal) * 100) * 10) / 10.0 + NL);
            }
            count++;
        }
        if (allDocs.size() > 0) {
            printLines.add("-------------------------------------------------------------" + NL);
            printLines.add("Pipeline Total" + "\t" + globalTotal / 1000.0 + "\t" + 100 + NL + NL + NL);
        }
        BufferedWriter out = null;
        try {
            // Writing to report file
            out = new BufferedWriter(new FileWriter(outputFile, true));
            for (String line : printLines) {
                out.write(line);
            }

        } catch (IOException e) {
            e.printStackTrace();

        } finally {
            try {
                if (out != null) {
                    out.close();
                }
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }

    /**
     * Stores GATE processing elements and the time taken by them in an in-memory
     * data structure for report generation.
     *
     * @param inputFile
     *          A handle to the input benchmark file.
     *
     * @return An Object of type LinkedHashMap<String, Object> containing the
     *         processing elements (with time in milliseconds) in hierarchical
     *         structure. Null if there was an error.
     *
     * @throws BenchmarkReportInputFileFormatException
     *           if the input file provided is not a valid benchmark file.
     */
    @Override
    public Object store(File inputFile) throws BenchmarkReportInputFileFormatException {
        String[] temp = inputFile.getAbsolutePath().split("\\" + File.separator);
        pipelineName = temp[temp.length - 1].replace("_benchmark.txt", "");
        LinkedHashMap<String, Object> globalStore = new LinkedHashMap<String, Object>();
        BufferedReader in = null;
        try {
            in = new BufferedReader(new FileReader(inputFile));
            String str;
            String docName = null;
            String matchedPR = null;
            String startToken = null;
            // Reading the benchmark.txt one line at a time
            Pattern pattern = Pattern.compile("(\\d+) (\\d+) (.*) (.*) \\{(.*)\\}");
            // Pattern matching for extracting document name
            Pattern patternDocName = Pattern.compile(".*documentName=(.*?)[,|}].*");
            while ((str = in.readLine()) != null) {
                if (str.matches(".*START.*")) {
                    String[] splittedStartEntry = str.split("\\s");
                    if (splittedStartEntry.length > 2) {
                        startToken = splittedStartEntry[2];
                    } else {
                        throw new BenchmarkReportInputFileFormatException(getBenchmarkFile() + " is invalid.");
                    }
                }
                Matcher matcher = pattern.matcher(str);
                Matcher matcherDocName = patternDocName.matcher(str);
                Pattern patternDocEnd = Pattern
                        .compile("(\\d+) (\\d+) " + Pattern.quote(startToken) + " (.*) \\{(.*)\\}.*");
                Matcher matcherDocEnd = patternDocEnd.matcher(str);
                if (matcherDocName != null) {
                    if (matcherDocName.matches()) {
                        docName = matcherDocName.group(1);

                    }
                }
                if (matcherDocEnd != null) {
                    if (matcherDocEnd.matches()) {

                        globalTotal = globalTotal + Integer.parseInt(matcherDocEnd.group(2));
                    }
                }
                if (matcher != null && matcher.matches()) {
                    String benchmarkIDs = matcher.group(3).replaceFirst(Pattern.quote(startToken) + ".", "")
                            .replaceFirst("doc_" + Pattern.quote(docName) + ".", "");
                    String[] splittedBenchmarkIDs = benchmarkIDs.split("\\.");
                    // Getting the exact PR name and storing only entries matching PR name
                    if (PRMatchingRegex.equals(MATCH_ALL_PR_REGEX)) {
                        if (splittedBenchmarkIDs.length > 0) {
                            matchedPR = splittedBenchmarkIDs[0];
                        }
                        if (!matchedPR.equalsIgnoreCase(startToken)) {
                            organizeEntries(globalStore, matchedPR, matcher.group(2), docName);
                        }
                    } else if (isPRMatched(benchmarkIDs, PRMatchingRegex)) {
                        if (splittedBenchmarkIDs.length > 0) {
                            matchedPR = splittedBenchmarkIDs[0];
                        }
                        if (matchedPR != null)
                            matchingPRs.add(matchedPR);
                        organizeEntries(globalStore, matchedPR, matcher.group(2), docName);
                    }
                }
            }

        } catch (IOException e) {
            e.printStackTrace();
            globalStore = null;

        } finally {
            try {
                if (in != null) {
                    in.close();
                }
            } catch (IOException e) {
                e.printStackTrace();
                globalStore = null;
            }
        }
        return globalStore;
    }

    /**
     * Organizes the valid data extracted from the log entries into LinkedHashMap.
     *
     * @param store
     *          A global LinkedHashMap containing the processing elements (with
     *          time in milliseconds) in hierarchical structure.
     * @param matchedPR
     *          A PR matching the given search string.
     * @param bTime
     *          Time taken by the specific processing element.
     * @param docName
     *          Name of the document being processed.
     */
    @SuppressWarnings("unchecked")
    private void organizeEntries(LinkedHashMap<String, Object> store, String matchedPR, String bTime,
            String docName) {
        allDocs.add(docName);
        if (store.containsKey(matchedPR)) {
            ((LinkedHashMap<String, Object>) store.get(matchedPR)).put(docName, bTime);
        } else {
            LinkedHashMap<String, Object> tempLHM = new LinkedHashMap<String, Object>();
            tempLHM.put(docName, bTime);
            store.put(matchedPR, tempLHM);
        }
    }

    /**
     * Prints the document level statistics report in HTML format.
     *
     * @param reportSource
     *          An Object of type LinkedHashMap<String, Object> containing the
     *          document names (with time in milliseconds).
     * @param outputFile
     *          An object of type File representing the output report file to
     *          which the HTML report is to be written.
     */
    private void printToHTML(LinkedHashMap<String, Object> reportSource, File outputFile) {
        String htmlReport = "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\"" + NL
                + "\"http://www.w3.org/TR/html4/loose.dtd\">" + NL
                + "<html><head><title>Benchmarking Report</title>" + NL + "<meta http-equiv=\"Content-Type\""
                + " content=\"text/html; charset=utf-8\">" + NL + "<style type=\"text/css\">" + NL
                + "div { font-size:12px; margin-top: 4; }" + NL + "</style>" + NL + "</head>" + NL
                + "<body style=\"font-family:Verdana; color:navy;\">" + NL;
        String hTrace = "<div style=\"right: 0pt; border-top:1px solid #C9D7F1;" + " font-size:1px;\" ></div>" + NL;
        String reportTitle = hTrace;
        String docs = "";
        if (maxDocumentInReport != ALL_DOCS) {
            if (allDocs.size() < maxDocumentInReport) {
                docs = Integer.toString(allDocs.size());
            } else {
                docs = Integer.toString(maxDocumentInReport);
            }
        } else {
            docs = "All";
        }
        if (PRMatchingRegex.equals(MATCH_ALL_PR_REGEX)) {
            reportTitle = reportTitle + "<div style=\"font-size:15px;font-family:Verdana; color:navy;\">Top " + docs
                    + " expensive documents matching All PRs in <b>" + pipelineName + "</b></div>" + NL;
        } else {
            if (matchingPRs.size() > 0) {
                reportTitle = reportTitle + "<div style=\"font-size:15px;font-family:Verdana; color:navy;\">Top "
                        + docs + " expensive documents matching following PRs in <b>" + pipelineName + "</b> <ul>"
                        + NL;
                for (String pr : matchingPRs) {
                    reportTitle = reportTitle + "<li>" + pr + "</li>";
                }
                reportTitle = reportTitle + "</ul></div>";
            } else {
                reportTitle += "<div style=\"font-size:15px;font-family:Verdana; color:navy;\">"
                        + "No PRs matched to search string \"" + getPRMatchingRegex() + " \" in " + pipelineName
                        + "</div>";
            }
        }
        reportTitle = reportTitle + hTrace;

        if (allDocs.size() > 0) {
            String htmlReportTitle = reportTitle + "<table><tr bgcolor=\"#eeeeff\">"
                    + "<td><b>Document Name</b></td>" + "<td><b>Time in seconds</b></td>"
                    + "<td><b>% Time taken</b></td>" + "</tr><tr>" + NL;
            String documentNameHTMLString = "<td rowspan = '112' width = '550'>";
            String timeTakenHTMLString = "<td width = '100'>";
            String timeInPercentHTMLString = "<td width = '100'>";
            LinkedHashMap<String, Object> rcHash = reportSource;
            rcHash.remove("total");
            Iterator<String> i = rcHash.keySet().iterator();
            int count = 0;
            while (i.hasNext()) {
                Object key = i.next();
                if (!((String) key).equals("total")) {
                    int value = Integer.parseInt((String) rcHash.get(key));
                    if (maxDocumentInReport == ALL_DOCS) {
                        documentNameHTMLString += "<div>" + key + "</div>";
                        timeTakenHTMLString += "<div>" + value / 1000.0 + "</div>";
                        timeInPercentHTMLString += "<div>" + Math.round(((value / globalTotal) * 100) * 10) / 10.0
                                + "</div>" + NL;
                    } else if (count < maxDocumentInReport) {
                        documentNameHTMLString += "<div>" + key + "</div>";
                        timeTakenHTMLString += "<div>" + value / 1000.0 + "</div>";
                        timeInPercentHTMLString += "<div>" + Math.round(((value / globalTotal) * 100) * 10) / 10.0
                                + "</div>" + NL;
                    }
                }
                count++;
            }
            documentNameHTMLString += "<div bgcolor=\"#eeeeff\" style = \"font-size:15px;margin-left:400px;\">"
                    + "<b>Total</b></div></td>" + NL;
            timeTakenHTMLString += "<div bgcolor=\"#eeeeff\" style = \"font-size:15px;\"><b>" + globalTotal / 1000.0
                    + "</b></div></td>" + NL;
            timeInPercentHTMLString += "<div bgcolor=\"#eeeeff\" style = \"font-size:15px;\">"
                    + "<b>100</b></div></td>" + NL;

            if (!outputFile.exists()) {
                htmlReport += htmlReportTitle + documentNameHTMLString + timeTakenHTMLString
                        + timeInPercentHTMLString + "</tr></table>";
            } else {
                htmlReport = "<br/><br/>" + htmlReportTitle + documentNameHTMLString + timeTakenHTMLString
                        + timeInPercentHTMLString + "</tr></table></body></html>";
            }
        } else {
            htmlReport += reportTitle + "</body></html>";
        }

        BufferedWriter out = null;
        try {
            out = new BufferedWriter(new FileWriter(outputFile));
            out.write(htmlReport);

        } catch (IOException e) {
            e.printStackTrace();

        } finally {
            try {
                if (out != null) {
                    out.close();
                }
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }

    /**
     * Ignores the inconsistent log entries from the benchmark file. Entries from
     * modules like pronominal coreferencer which have not been converted to new
     * benchmarking conventions are ignored.
     *
     * @param benchmarkIDChain
     *          the chain of benchmark ids. This is the third token in the
     *          benchmark file.
     * @param startTokens
     *          an array of first tokens in the benchmark id chain.
     *
     * @return true if valid log entry; false otherwise.
     */
    private boolean validateLogEntry(String benchmarkIDChain, ArrayList<String> startTokens) {
        String startTokenRegExp = "(";
        for (int i = 0; i < startTokens.size(); i++) {
            if ((benchmarkIDChain.split("\\.")).length == 1 && benchmarkIDChain.equals(startTokens.get(i))) {
                validEntries += 1;
                return true;
            }
            startTokenRegExp += startTokens.get(i) + "|";
        }
        if (startTokenRegExp.length() > 1) {
            startTokenRegExp = startTokenRegExp.substring(0, startTokenRegExp.length() - 1);
        }
        startTokenRegExp += ")";
        if (benchmarkIDChain.matches(startTokenRegExp + "\\.doc_.*?\\.pr_.*")) {
            validEntries += 1;
            return true;
        } else
            return false;
    }

    /**
     * Parses the report command lime arguments.
     *
     * @param args array containing the command line arguments.
     */
    @Override
    public void parseArguments(String[] args) {
        Getopt g = new Getopt("gate.util.reporting.DocTimeReporter", args, "i:m:d:p:o:l:h");
        int c;
        String argNoOfDocs = null;
        while ((c = g.getopt()) != -1) {
            switch (c) {
            // -i inputFile
            case 'i':
                String argInPath = g.getOptarg();
                if (argInPath != null) {
                    setBenchmarkFile(new File(argInPath));
                }
                break;
            // -m printMedia
            case 'm':
                String argPrintMedia = g.getOptarg();
                if (argPrintMedia != null) {
                    setPrintMedia(argPrintMedia);
                }
                break;
            // -d noOfDocs
            case 'd':
                argNoOfDocs = g.getOptarg();
                if (argNoOfDocs == null) {
                    setMaxDocumentInReport(maxDocumentInReport);
                }
                break;
            // -p prName
            case 'p':
                String argPrName = g.getOptarg();
                if (argPrName != null) {
                    setPRMatchingRegex(argPrName);
                } else {
                    setPRMatchingRegex(PRMatchingRegex);
                }
                break;
            // -o Report File
            case 'o':
                String argOutPath = g.getOptarg();
                if (argOutPath != null) {
                    setReportFile(new File(argOutPath));
                }
                break;
            // -l logical start
            case 'l':
                String argLogicalStart = g.getOptarg();
                if (argLogicalStart != null) {
                    setLogicalStart(argLogicalStart);
                }
                break;
            // -h usage information
            case 'h':
            case '?':
                usage();
                System.exit(STATUS_NORMAL);
                break;

            default:
                usage();
                System.exit(STATUS_ERROR);
                break;

            } // getopt switch
        }
        if (argNoOfDocs != null) {
            try {
                setMaxDocumentInReport(Integer.parseInt(argNoOfDocs));
            } catch (NumberFormatException e) {
                e.printStackTrace();
                usage();
                System.exit(STATUS_ERROR);
            }
        }
    }

    /**
     * Returns the name of the media on which report will be generated. e.g. text,
     * HTML.
     *
     * @return printMedia A String containing the name of the media on which
     *         report will be generated.
     */
    public String getPrintMedia() {
        return printMedia;
    }

    /**
     * Sets the media on which report will be generated.
     *
     * @param printMedia Type of media on which the report will be generated.
     * Must be MEDIA_TEXT or  MEDIA_HTML.
     * The default is MEDIA_HTML.
     */
    public void setPrintMedia(String printMedia) {
        if (!printMedia.equals(MEDIA_HTML) && !printMedia.equals(MEDIA_TEXT)) {
            throw new IllegalArgumentException("Illegal argument: " + printMedia);
        }
        this.printMedia = printMedia.trim();
    }

    /**
     * Provides the functionality to match a user input string with the PR in the
     * given benchmark ids.
     *
     * @param benchmarkIDs
     *          A string of benchmarkIDs containing the PR name at the start of
     *          string.
     * @param searchString
     *          The string to be matched for PR name.
     *
     * @return boolean true if search string matches PR name; false otherwise.
     */
    private boolean isPRMatched(String benchmarkIDs, String searchString) {
        String prName = benchmarkIDs.split("\\.")[0];
        // Remove leading and trailing whitespaces of search string
        searchString = searchString.trim();
        // Remove "pr" or "pr_" appearing in start of the prName string
        searchString = searchString.replaceAll("^(pr|pr_)", "");
        // Replace underscores with a space in the search string
        searchString = searchString.replaceAll("_", " ");
        // Replace multiple spaces with a single space
        searchString = searchString.replaceAll("\\s+", " ");
        searchString = searchString.trim();
        // Remove "pr_" appearing in start of the prName string
        String processedPRName = prName.replaceAll("^pr_", "");
        // Replace underscores with a space in the prName
        processedPRName = processedPRName.replaceAll("_", " ");
        if (prName.startsWith("pr_")) {
            return processedPRName.matches("(?i).*" + searchString + ".*");
        } else {
            return false;
        }
    }

    /**
     * A method for deleting a given file.
     *
     * @param fileToBeDeleted
     *          A handle of the file to be deleted.
     * @throws BenchmarkReportFileAccessException
     *           if a given file could not be deleted.
     */
    private void deleteFile(File fileToBeDeleted) throws BenchmarkReportFileAccessException {
        if (fileToBeDeleted.isFile()) {
            if (!fileToBeDeleted.delete()) {
                throw new BenchmarkReportFileAccessException(
                        "Could not delete " + fileToBeDeleted.getAbsolutePath());
            }
        }
    }

    /**
     * Provides the functionality to separate out pipeline specific benchmark
     * entries in separate temporary benchmark files in a temporary folder in the
     * current working directory.
     *
     * @param benchmarkFile
     *          An object of type File representing the input benchmark file.
     * @param report
     *          A file handle to the report file to be written.
     * @throws BenchmarkReportFileAccessException
     *           if any error occurs while accessing the input benchmark file or
     *           while splitting it.
     * @throws BenchmarkReportExecutionException
     *           if the given input benchmark file is modified while generating
     *           the report.
     */
    private void splitBenchmarkFile(File benchmarkFile, File report)
            throws BenchmarkReportFileAccessException, BenchmarkReportInputFileFormatException {
        File dir = temporaryDirectory;
        // Folder already exists; then delete all files in the temporary folder
        if (dir.isDirectory()) {
            File files[] = dir.listFiles();
            for (int count = 0; count < files.length; count++) {
                if (!files[count].delete()) {
                    throw new BenchmarkReportFileAccessException(
                            "Could not delete files in the folder \"" + temporaryDirectory + "\"");
                }
            }
        } else if (!dir.mkdir()) {
            throw new BenchmarkReportFileAccessException(
                    "Could not create  temporary folder \"" + temporaryDirectory + "\"");
        }

        // delete report2 from the filesystem
        if (getPrintMedia().equalsIgnoreCase(MEDIA_TEXT)) {
            deleteFile(new File(report.getAbsolutePath() + ".txt"));
        } else if (getPrintMedia().equalsIgnoreCase(MEDIA_HTML)) {
            deleteFile(new File(report.getAbsolutePath() + ".html"));
        }

        RandomAccessFile in = null;
        BufferedWriter out = null;
        try {
            String logEntry = "";
            long fromPos = 0;

            // File benchmarkFileName;
            if (getLogicalStart() != null) {
                fromPos = tail(benchmarkFile, FILE_CHUNK_SIZE);
            }
            in = new RandomAccessFile(benchmarkFile, "r");

            if (getLogicalStart() != null) {
                in.seek(fromPos);
            }
            ArrayList<String> startTokens = new ArrayList<String>();
            String lastStart = "";
            Pattern pattern = Pattern.compile("(\\d+) (\\d+) (.*) (.*) \\{(.*)\\}");
            Matcher matcher = null;
            File benchmarkFileName = null;
            while ((logEntry = in.readLine()) != null) {
                matcher = pattern.matcher(logEntry);
                String startToken = "";
                if (logEntry.matches(".*START.*")) {
                    String[] splittedStartEntry = logEntry.split("\\s");
                    if (splittedStartEntry.length > 2) {
                        startToken = splittedStartEntry[2];
                    } else {
                        throw new BenchmarkReportInputFileFormatException(getBenchmarkFile() + " is invalid.");
                    }

                    if (startToken.endsWith("Start")) {
                        continue;
                    }
                    if (!startTokens.contains(startToken)) {
                        // create a new file for the new pipeline
                        startTokens.add(startToken);
                        benchmarkFileName = new File(temporaryDirectory, startToken + "_benchmark.txt");
                        if (!benchmarkFileName.createNewFile()) {
                            throw new BenchmarkReportFileAccessException("Could not create \"" + startToken
                                    + "_benchmark.txt" + "\" in directory named \"" + temporaryDirectory + "\"");
                        }
                        out = new BufferedWriter(new FileWriter(benchmarkFileName));
                        out.write(logEntry);
                        out.newLine();
                    }
                }
                // if a valid benchmark entry then write it to the pipeline specific
                // file
                if (matcher != null && matcher.matches() && (validateLogEntry(matcher.group(3), startTokens)
                        || logEntry.matches(".*documentLoaded.*"))) {
                    startToken = matcher.group(3).split("\\.")[0];
                    if (!(lastStart.equals(startToken))) {
                        if (out != null) {
                            out.close();
                        }
                        benchmarkFileName = new File(temporaryDirectory, startToken + "_benchmark.txt");
                        out = new BufferedWriter(new FileWriter(benchmarkFileName, true));
                    }
                    if (out != null) {
                        out.write(logEntry);
                        out.newLine();
                    }
                    lastStart = startToken;
                }
            }

        } catch (IOException e) {
            e.printStackTrace();

        } finally {
            try {
                if (in != null) {
                    in.close();
                }
                if (out != null) {
                    out.close();
                }
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }

    /**
     * A method for reading the file upside down.
     *
     * @param fileToBeRead
     *          An object of the file to be read.
     * @param chunkSize
     *          An integer specifying the size of the chunks in which file will be
     *          read.
     * @return A long value pointing to the start position of the given file
     *         chunk.
     */
    private long tail(File fileToBeRead, int chunkSize) throws BenchmarkReportInputFileFormatException {
        RandomAccessFile raf = null;
        try {
            raf = new RandomAccessFile(fileToBeRead, "r");
            Vector<String> lastNlines = new Vector<String>();
            int delta = 0;
            long curPos = 0;
            curPos = raf.length() - 1;
            long fromPos;
            byte[] bytearray;
            while (true) {
                fromPos = curPos - chunkSize;
                if (fromPos <= 0) {
                    raf.seek(0);
                    bytearray = new byte[(int) curPos];
                    raf.readFully(bytearray);
                    if (parseLinesFromLast(bytearray, lastNlines, fromPos)) {
                        if (fromPos < 0)
                            fromPos = 0;
                    }
                    break;
                } else {
                    raf.seek(fromPos);
                    bytearray = new byte[chunkSize];
                    raf.readFully(bytearray);
                    if (parseLinesFromLast(bytearray, lastNlines, fromPos)) {
                        break;
                    }
                    delta = lastNlines.get(lastNlines.size() - 1).length();
                    lastNlines.remove(lastNlines.size() - 1);
                    curPos = fromPos + delta;
                }
            }
            if (fromPos < 0)
                throw new BenchmarkReportInputFileFormatException(
                        getBenchmarkFile() + " does not contain a marker named " + getLogicalStart()
                                + " indicating logical start of a run.");
            return fromPos;

        } catch (IOException e) {
            e.printStackTrace();
            return -1;
        } finally {
            IOUtils.closeQuietly(raf);
        }
    }

    /**
     * A method to ensure that the required line is read from the given file part.
     *
     * @param bytearray
     *          A part of a file being read upside down.
     * @param lastNlines
     *          A vector containing the lines extracted from file part.
     * @param fromPos
     *          A long value indicating the start of a file part.
     *
     * @return true if marker indicating the logical start of run is found; false
     *         otherwise.
     */
    private boolean parseLinesFromLast(byte[] bytearray, Vector<String> lastNlines, long fromPos) {
        String lastNChars = new String(bytearray);
        StringBuffer sb = new StringBuffer(lastNChars);
        lastNChars = sb.reverse().toString();
        StringTokenizer tokens = new StringTokenizer(lastNChars, NL);
        while (tokens.hasMoreTokens()) {
            StringBuffer sbLine = new StringBuffer(tokens.nextToken());
            lastNlines.add(sbLine.reverse().toString());
            if ((lastNlines.get(lastNlines.size() - 1)).trim().endsWith(getLogicalStart())) {
                return true;
            }
        }
        return false;
    }

    /**
     * Display a usage message
     */
    public static void usage() {
        System.out.println("Usage: java gate.util.reporting.DocTimeReporter [Options]" + NL + "\t Options:" + NL
                + "\t -i input file path (default: benchmark.txt in the execution directory)" + NL
                + "\t -m print media - html/text (default: html)" + NL
                + "\t -d number of docs, use -1 for all docs (default: 10 docs)" + NL
                + "\t -p processing resource name to be matched (default: all_prs)" + NL
                + "\t -o output file path (default: report.html/txt in the system temporary directory)" + NL
                + "\t -l logical start (not set by default)" + NL + "\t -h show help" + NL);
    } // usage()

    /**
     * A main method which acts as a entry point while executing a report via
     * command line
     *
     * @param args
     *          A string array containing the command line arguments.
     * @throws BenchmarkReportExecutionException
     *           if a given input file is modified while generating the report.
     */
    public static void main(String[] args)
            throws BenchmarkReportInputFileFormatException, BenchmarkReportFileAccessException {
        // process command-line options
        DocTimeReporter reportTwo = new DocTimeReporter(args);
        reportTwo.generateReport();
    }

    /**
     * Calls store, calculate and printReport for generating the actual report.
     */
    private void generateReport()
            throws BenchmarkReportInputFileFormatException, BenchmarkReportFileAccessException {
        Timer timer = null;
        try {
            TimerTask task = new FileWatcher(getBenchmarkFile()) {
                @Override
                protected void onChange(File file) {
                    throw new BenchmarkReportExecutionException(
                            getBenchmarkFile() + " file has been modified while generating the report.");
                }
            };
            timer = new Timer();
            // repeat the check every second
            timer.schedule(task, new Date(), 1000);

            if (reportFile == null) {
                reportFile = new File(System.getProperty("java.io.tmpdir"),
                        "report." + ((printMedia.equals(MEDIA_HTML)) ? "html" : "txt"));
            }
            splitBenchmarkFile(getBenchmarkFile(), reportFile);
            if (validEntries == 0) {
                if (logicalStart != null) {
                    throw new BenchmarkReportInputFileFormatException("No valid log entries present in "
                            + getBenchmarkFile() + " does not contain a marker named " + logicalStart + ".");
                } else {
                    throw new BenchmarkReportInputFileFormatException(
                            "No valid log entries present in " + getBenchmarkFile().getAbsolutePath());
                }
            }
            File dir = temporaryDirectory;
            // Folder already exists; then delete all files in the temporary folder
            if (dir.isDirectory()) {
                File files[] = dir.listFiles();
                for (int count = 0; count < files.length; count++) {
                    File inFile = files[count];
                    Object report2Container1 = store(inFile);
                    Object report2Container2 = calculate(report2Container1);
                    printReport(report2Container2, reportFile);
                }
                if (files.length > 0 && files[0].exists()) {
                    if (!files[0].delete()) {
                        System.err.println(files[0] + " was not possible to delete.");
                    }
                }
            }
        } finally {
            if (timer != null) {
                timer.cancel();
            }
        }
    }

    /*
     * (non-Javadoc)
     *
     * @see gate.util.reporting.BenchmarkReportable#executeReport()
     */
    @Override
    public void executeReport() throws BenchmarkReportInputFileFormatException, BenchmarkReportFileAccessException {
        generateReport();
    }

    /**
     * Returns the marker indicating logical start of a run.
     *
     * @return logicalStart A String containing the marker indicating logical
     *         start of a run.
     */
    public String getLogicalStart() {
        return logicalStart;
    }

    /**
     * Sets optionally a string indicating the logical start of a run.
     *
     * @param logicalStart A String indicating the logical start of a run.
     * Useful when you you have marked different runs in
     * your benchmark file with this string at their start.
     * By default the value is null.
     */
    public void setLogicalStart(String logicalStart) {
        this.logicalStart = logicalStart;
    }

    /**
     * @return benchmarkFile path to input benchmark file.
     * @see #setBenchmarkFile(java.io.File)
     */
    public File getBenchmarkFile() {
        return benchmarkFile;
    }

    /**
     * Sets the input benchmark file from which the report is generated.
     * By default use the file named "benchmark.txt" from the application
     * execution directory.
     *
     * @param benchmarkFile Input benchmark file.
     */
    public void setBenchmarkFile(File benchmarkFile) {
        this.benchmarkFile = benchmarkFile;
    }

    /**
     * @return reportFile file path where the report file is written.
     * @see #setReportFile(java.io.File)
     */
    public File getReportFile() {
        return reportFile;
    }

    /**
     * If not set, the default is the file name "report.txt/html"
     * in the system temporary directory.
     *
     * @param reportFile file path to the report file to write.
     */
    public void setReportFile(File reportFile) {
        this.reportFile = reportFile;
    }

    /**
     * Returns the maximum no of documents to be shown in the report.
     *
     * @return maxDocumentInReport An integer specifying the maximum no of
     *         documents to be shown in the report.
     */
    public int getMaxDocumentInReport() {
        return maxDocumentInReport;
    }

    /**
     * Maximum number of documents contained in the report.
     * @param maxDocumentInReport Maximum number of documents contained in
     * the report. Use the constant ALL_DOCS for reporting all documents.
     * The default is 10.
     */
    public void setMaxDocumentInReport(int maxDocumentInReport) {
        if (!(maxDocumentInReport > 0 || maxDocumentInReport == ALL_DOCS)) {
            throw new IllegalArgumentException("Illegal argument: " + maxDocumentInReport);
        }
        this.maxDocumentInReport = maxDocumentInReport;
    }

    /**
     * Returns the search string to be matched to PR names present in the log
     * entries.
     *
     * @return PRMatchingRegex A String to be matched to PR names present in the
     *         log entries.
     */
    public String getPRMatchingRegex() {
        return PRMatchingRegex;
    }

    /**
     * Search string to match PR names present in the benchmark file.
     *
     * @param matchingRegex regular expression to match PR names
     * present in the benchmark file. The default is MATCH_ALL_PR_REGEX.
     */
    public void setPRMatchingRegex(String matchingRegex) {
        PRMatchingRegex = matchingRegex;
    }
}

/**
 * A FileWather class to check whether the file is modified or not at specified
 * interval.
 */
abstract class FileWatcher extends TimerTask {
    private long timeStamp;
    private File file;

    /**
     * Creates a FileWatcher on a given file.
     *
     * @param file
     *          A handle of the file to be watched.
     */
    public FileWatcher(File file) {
        this.file = file;
        timeStamp = file.lastModified();
    }

    /*
     * (non-Javadoc)
     *
     * @see java.util.TimerTask#run()
     */
    @Override
    public final void run() {
        long oldTimeStamp = file.lastModified();
        if (timeStamp != oldTimeStamp) {
            cancel();
            onChange(file);
        }
    }

    /**
     * Specifies the actions to be taken when a file is modified.
     *
     * @param file
     *          A handle of the file to be watched.
     */
    protected abstract void onChange(File file) throws BenchmarkReportExecutionException;
}