Source code

Java tutorial


Here is the source code for


 * This file is part of
 * is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License (GPL) as published by
 * the Free Software Foundation, either version 3 of the License, or
 * any later version.
 * is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * GNU General Public License for more details.
 * You should have received a copy of the GNU General Public License
 * along with .  If not, see <>.
package org.transitime.utils.csv;

import java.text.ParseException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVRecord;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.transitime.utils.IntervalTimer;
import org.transitime.utils.Time;

 * For parsing a CSV file. Does all of the hard work. This class is
 * abstract because it needs to be subclassed to read in specific
 * CSV file type.
 * @author SkiBu Smith
public abstract class CsvBaseReader<T> {

    // Full file name of CSV file to be read
    private final String fileName;

    // Keeps track whether this file is required or not as per
    // the CSV spec. 
    private final boolean required;

    // Whether file is a supplemental one or not. For supplemental
    // files some of elements specified as required in the CSV
    // spec can actually be missing since the data from supplemental
    // file is going to be combined with the main file.
    private final boolean supplemental;

    // The CSV objects read from the file
    protected List<T> gtfsObjects;

    protected static final Logger logger = LoggerFactory.getLogger(CsvBaseReader.class);

    /********************** Member Functions **************************/

     * Constructor. Stores the file name to be used.
     * @param dirName
     * @param fileName
     * @param required
     * @param supplemental
    protected CsvBaseReader(String dirName, String fileName, boolean required, boolean supplemental) {
        this.fileName = dirName + "/" + fileName;
        this.required = required;
        this.supplemental = supplemental;

     * Constructor with fewer params. More useful for non-CSV files. 
     * Sets required to true and supplemental to false.
     * @param fileName
    protected CsvBaseReader(String fileName) {
        this.fileName = fileName;
        this.required = true;
        this.supplemental = false;

     * Called for every record in file. Must be overridden by subclass since an
     * object of the appropriate type needs to be created.
     * @param record
     * @return The created GTFS object, or null if object filtered out
    abstract protected T handleRecord(CSVRecord record, boolean supplemental)
            throws ParseException, NumberFormatException;

     * Parse the CSV file. Reads in the header info and then each line. Calls
     * the abstract handleRecord() method for each record. Adds each resulting
     * CSV object to the gtfsObjecgts array.
    private void parse() {
        CSVRecord record = null;
        try {
            IntervalTimer timer = new IntervalTimer();

            logger.debug("Parsing CSV file {} ...", fileName);

            // Open the file for reading. Use UTF-8 format since that will work
            // for both regular ASCII format and UTF-8 extended format files 
            // since UTF-8 was designed to be backwards compatible with ASCII. 
            // This way will work for Chinese and other character sets. Use
            // InputStreamReader so can specify that using UTF-8 format. Use
            // BufferedReader so that can determine if first character is an
            // optional BOM (Byte Order Mark) character used to indicate that 
            // file is in UTF-8 format. BufferedReader allows us to read in
            // first character and then discard if it is a BOM character or
            // reset the reader to back to the beginning if it is not. This
            // way the CSV parser will process the file starting with the first
            // true character.         
            Reader in = new BufferedReader(new InputStreamReader(new FileInputStream(fileName), "UTF-8"));

            // Deal with the possible BOM character at the beginning of the file
            int firstRead =;
            final int BOM_CHARACTER = 0xFEFF;
            if (firstRead != BOM_CHARACTER)

            // Get ready to parse the CSV file.
            // Allow lines to be comments if they start with "-" so that can
            // easily comment out problems and also test what happens when
            // certain data is missing. Using the '-' character so can
            // comment out line that starts with "--", which is what is 
            // used for SQL. 
            CSVFormat formatter = CSVFormat.DEFAULT.withHeader().withCommentMarker('-');

            // Parse the file
            Iterable<CSVRecord> records = formatter.parse(in);

            logger.debug("Finished CSV parsing of file {}. Took {} msec.", fileName, timer.elapsedMsec());

            int lineNumberWhenLogged = 0;
            timer = new IntervalTimer();
            IntervalTimer loggingTimer = new IntervalTimer();

            Iterator<CSVRecord> iterator = records.iterator();
            while (iterator.hasNext()) {
                // Determine the record to process
                record =;

                // If blank line then skip it. This way avoid error messages since
                // expected data column won't exist
                if (record.size() == 0)

                // Process the record using appropriate handler
                // and create the corresponding CSV object
                T gtfsObject;
                try {
                    gtfsObject = handleRecord(record, supplemental);
                } catch (ParseException e) {
                    logger.error("ParseException occurred for record {} "
                            + "(comment lines not included when determing record #) for " + "filename {} . {}",
                            record.getRecordNumber(), fileName, e.getMessage());

                    // Continue even though there was an error so that all errors 
                    // logged at once.               
                } catch (NumberFormatException e) {
                    logger.error("NumberFormatException occurred for record {} "
                            + "(comment lines not included when determing record #) " + "for filename {} . {}",
                            record.getRecordNumber(), fileName, e.getMessage());

                    // Continue even though there was an error so that all errors 
                    // logged at once.               

                // Add the newly created CSV object to the object list
                if (gtfsObject != null)

                // Log info if it has been a while. Check only every 20,000
                // lines to see if the 10 seconds has gone by. If so, then log
                // number of lines. By only looking at timer every 20,000 lines
                // not slowing things down by for every line doing system call 
                // for to get current time.
                final int LINES_TO_PROCESS_BEFORE_CHECKING_IF_SHOULD_LOG = 20000;
                final long SECONDS_ELSAPSED_UNTIL_SHOULD_LOG = 5;
                if (record.getRecordNumber() >= lineNumberWhenLogged
                    lineNumberWhenLogged = (int) record.getRecordNumber();
                    if (loggingTimer.elapsedMsec() > SECONDS_ELSAPSED_UNTIL_SHOULD_LOG * Time.MS_PER_SEC) {
              "  Processed {} lines. Took {} msec...", lineNumberWhenLogged,
                        loggingTimer = new IntervalTimer();
            } // End of while iterating over records

            // Close up the file reader

            // Determine number of records for logging message
            long numberRecords = 0;
            if (record != null)
                numberRecords = record.getRecordNumber();

  "Finished parsing {} records from file {} . Took {} msec.", numberRecords, fileName,
        } catch (FileNotFoundException e) {
            if (required)
                logger.error("Required CSV file {} not found.", fileName);
      "CSV file {} not found but OK because this file " + "not required.", fileName);
        } catch (IOException e) {
            logger.error("IOException occurred when reading in filename {}.", fileName, e);

     * The way one gets the list of CSV objects. Uses default size for creating
     * ArrayList of 100.
     * @return List of CSV objects. Can be empty but not null.
    public List<T> get() {
        return get(100);

     * The way one gets the list of CSV objects.
     * @param initialSize
     *            Initial size of array that returns the objects. For when
     *            expect a really large array, such as for stop_times then can
     *            initialize to large value.
     * @return List of CSV objects. Can be empty but not null.
    public List<T> get(int initialSize) {
        gtfsObjects = new ArrayList<T>(initialSize);


        return gtfsObjects;

     * @return the file name of the file being processed
    public String getFileName() {
        return fileName;