Example usage for org.apache.commons.csv CSVFormat parse

Introduction

In this page you can find the example usage for org.apache.commons.csv CSVFormat parse.

Prototype

public CSVParser parse(final Reader in) throws IOException

Source Link

Document

Parses the specified content.

Usage

From source file:org.transitime.custom.sfmta.delayTimes.Loc.java

public static List<Loc> readLocs(String fileName) {
    List<Loc> locs = new ArrayList<Loc>();

    try {//  www.  jav  a  2  s  . c om
        Reader in = new BufferedReader(new InputStreamReader(new FileInputStream(fileName), "UTF-8"));
        CSVFormat formatter = CSVFormat.DEFAULT.withHeader().withCommentMarker('-');

        // Parse the file
        Iterable<CSVRecord> records = formatter.parse(in);
        Iterator<CSVRecord> iterator = records.iterator();
        while (iterator.hasNext()) {
            // Determine the record to process
            CSVRecord record = iterator.next();
            Loc loc = getLoc(record);
            if (loc.accuracy < MAX_ALLOWED_ACCURACY)
                locs.add(loc);
        }
    } catch (Exception e) {
        e.printStackTrace();
    }

    return locs;
}

From source file:org.transitime.utils.csv.CsvBaseReader.java

/**
 * Parse the CSV file. Reads in the header info and then each line. Calls
 * the abstract handleRecord() method for each record. Adds each resulting
 * CSV object to the gtfsObjecgts array.
 */// ww  w .  ja  va  2  s .c om
private void parse() {
    CSVRecord record = null;
    try {
        IntervalTimer timer = new IntervalTimer();

        logger.debug("Parsing CSV file {} ...", fileName);

        // Open the file for reading. Use UTF-8 format since that will work
        // for both regular ASCII format and UTF-8 extended format files 
        // since UTF-8 was designed to be backwards compatible with ASCII. 
        // This way will work for Chinese and other character sets. Use
        // InputStreamReader so can specify that using UTF-8 format. Use
        // BufferedReader so that can determine if first character is an
        // optional BOM (Byte Order Mark) character used to indicate that 
        // file is in UTF-8 format. BufferedReader allows us to read in
        // first character and then discard if it is a BOM character or
        // reset the reader to back to the beginning if it is not. This
        // way the CSV parser will process the file starting with the first
        // true character.         
        Reader in = new BufferedReader(new InputStreamReader(new FileInputStream(fileName), "UTF-8"));

        // Deal with the possible BOM character at the beginning of the file
        in.mark(1);
        int firstRead = in.read();
        final int BOM_CHARACTER = 0xFEFF;
        if (firstRead != BOM_CHARACTER)
            in.reset();

        // Get ready to parse the CSV file.
        // Allow lines to be comments if they start with "-" so that can
        // easily comment out problems and also test what happens when
        // certain data is missing. Using the '-' character so can
        // comment out line that starts with "--", which is what is 
        // used for SQL. 
        CSVFormat formatter = CSVFormat.DEFAULT.withHeader().withCommentMarker('-');

        // Parse the file
        Iterable<CSVRecord> records = formatter.parse(in);

        logger.debug("Finished CSV parsing of file {}. Took {} msec.", fileName, timer.elapsedMsec());

        int lineNumberWhenLogged = 0;
        timer = new IntervalTimer();
        IntervalTimer loggingTimer = new IntervalTimer();

        Iterator<CSVRecord> iterator = records.iterator();
        while (iterator.hasNext()) {
            // Determine the record to process
            record = iterator.next();

            // If blank line then skip it. This way avoid error messages since
            // expected data column won't exist
            if (record.size() == 0)
                continue;

            // Process the record using appropriate handler
            // and create the corresponding CSV object
            T gtfsObject;
            try {
                gtfsObject = handleRecord(record, supplemental);
            } catch (ParseException e) {
                logger.error("ParseException occurred for record {} "
                        + "(comment lines not included when determing record #) for " + "filename {} . {}",
                        record.getRecordNumber(), fileName, e.getMessage());

                // Continue even though there was an error so that all errors 
                // logged at once.               
                continue;
            } catch (NumberFormatException e) {
                logger.error("NumberFormatException occurred for record {} "
                        + "(comment lines not included when determing record #) " + "for filename {} . {}",
                        record.getRecordNumber(), fileName, e.getMessage());

                // Continue even though there was an error so that all errors 
                // logged at once.               
                continue;
            }

            // Add the newly created CSV object to the object list
            if (gtfsObject != null)
                gtfsObjects.add(gtfsObject);

            // Log info if it has been a while. Check only every 20,000
            // lines to see if the 10 seconds has gone by. If so, then log
            // number of lines. By only looking at timer every 20,000 lines
            // not slowing things down by for every line doing system call 
            // for to get current time.
            final int LINES_TO_PROCESS_BEFORE_CHECKING_IF_SHOULD_LOG = 20000;
            final long SECONDS_ELSAPSED_UNTIL_SHOULD_LOG = 5;
            if (record.getRecordNumber() >= lineNumberWhenLogged
                    + LINES_TO_PROCESS_BEFORE_CHECKING_IF_SHOULD_LOG) {
                lineNumberWhenLogged = (int) record.getRecordNumber();
                if (loggingTimer.elapsedMsec() > SECONDS_ELSAPSED_UNTIL_SHOULD_LOG * Time.MS_PER_SEC) {
                    logger.info("  Processed {} lines. Took {} msec...", lineNumberWhenLogged,
                            timer.elapsedMsec());
                    loggingTimer = new IntervalTimer();
                }
            }
        } // End of while iterating over records

        // Close up the file reader
        in.close();

        // Determine number of records for logging message
        long numberRecords = 0;
        if (record != null)
            numberRecords = record.getRecordNumber();

        logger.info("Finished parsing {} records from file {} . Took {} msec.", numberRecords, fileName,
                timer.elapsedMsec());
    } catch (FileNotFoundException e) {
        if (required)
            logger.error("Required CSV file {} not found.", fileName);
        else
            logger.info("CSV file {} not found but OK because this file " + "not required.", fileName);
    } catch (IOException e) {
        logger.error("IOException occurred when reading in filename {}.", fileName, e);
    }
}

From source file:permafrost.tundra.data.IDataCSVParser.java

/**
 * Returns an IData representation of the CSV data in the given input stream.
 *
 * @param inputStream The input stream to be decoded.
 * @param charset     The character set to use.
 * @return An IData representation of the given input stream data.
 * @throws IOException If there is a problem reading from the stream.
 *//*from w  ww .j a v a 2s .c om*/
@Override
public IData decode(InputStream inputStream, Charset charset) throws IOException {
    if (inputStream == null)
        return null;

    Reader reader = new InputStreamReader(inputStream, CharsetHelper.normalize(charset));
    CSVFormat format = CSVFormat.DEFAULT.withHeader().withDelimiter(delimiter).withNullString("");
    CSVParser parser = format.parse(reader);

    Set<String> keys = parser.getHeaderMap().keySet();
    List<IData> list = new ArrayList<IData>();

    for (CSVRecord record : parser) {
        IData document = IDataFactory.create();
        IDataCursor cursor = document.getCursor();
        for (String key : keys) {
            if (record.isSet(key)) {
                String value = record.get(key);
                if (value != null)
                    IDataUtil.put(cursor, key, value);
            }
        }
        cursor.destroy();
        list.add(document);
    }

    IData output = IDataFactory.create();
    IDataCursor cursor = output.getCursor();
    IDataUtil.put(cursor, "recordWithNoID", list.toArray(new IData[list.size()]));

    return output;
}

From source file:umich.ms.batmass.filesupport.files.types.mzrt.model.MzrtFile.java

public void load() throws DataLoadingException {

    int[] counts = new int[3]; // [0] - \r\n, [1] - \n, [2] - \r
    final String[] separators = { "\r\n", "\n", "\r" };
    // detecting line separator
    try (InputStreamReader isr = new InputStreamReader(
            new BufferedInputStream(new FileInputStream(file.toFile())), Charsets.UTF_8)) {
        int c;/*  ww  w.  jav a2s .  c o m*/
        int encountered = 0;
        boolean isPrevR = false;
        int cutoff = 50;
        readLoop: while ((c = isr.read()) != -1) {
            char ch = (char) c;
            switch (ch) {
            case '\r':
                if (++counts[2] > cutoff) {
                    break readLoop;
                }
                isPrevR = true;
                break;
            case '\n':
                if (isPrevR) {
                    counts[2]--;
                    if (++counts[0] > cutoff) {
                        break readLoop;
                    }
                } else {
                    if (++counts[1] > cutoff) {
                        break readLoop;
                    }
                }
                isPrevR = false;
                break;
            default:
                isPrevR = false;
            }
        }
    } catch (IOException ex) {
        throw new DataLoadingException("Could not detect line separator", ex);
    }

    List<Integer> idxMax = new ArrayList<>();
    for (int i = 0; i < counts.length; i++) {
        if (idxMax.isEmpty()) {
            idxMax.add(i);
        } else if (counts[i] > counts[idxMax.get(0)]) {
            idxMax.clear();
            idxMax.add(i);
        } else if (counts[i] == counts[idxMax.get(0)]) {
            idxMax.add(i);
        }
    }

    String recordSeparator;
    if (idxMax.size() > 1) {
        if (idxMax.contains(0)) {
            recordSeparator = separators[0];
        } else if (idxMax.contains(1)) {
            recordSeparator = separators[1];
        } else {
            recordSeparator = separators[idxMax.get(0)];
        }
    } else {
        recordSeparator = separators[idxMax.get(0)];
    }

    // detecting delimiter
    char delimiter;
    try (BufferedReader br = new BufferedReader(new FileReader(file.toFile()))) {
        List<String> lines = new ArrayList<>();
        String line;
        int numTestLines = 10;
        while ((line = br.readLine()) != null) {
            if (!line.isEmpty()) {
                lines.add(line);
                if (lines.size() >= numTestLines)
                    break;
            }
        }

        delimiter = guessDelimiter(lines);
    } catch (IOException ex) {
        throw new DataLoadingException("Could not detect delimiter character", ex);
    }

    try (BufferedReader br = new BufferedReader(new FileReader(file.toFile()))) {
        CSVFormat fmt = CSVFormat.newFormat(delimiter);
        fmt = fmt.withHeader().withIgnoreEmptyLines(true).withTrim(true).withIgnoreHeaderCase(true)
                .withQuoteMode(QuoteMode.NON_NUMERIC).withRecordSeparator(recordSeparator).withQuote('"');

        CSVParser parser = fmt.parse(br);

        records = parser.getRecords();
        header = parser.getHeaderMap();

        String[] colNames = { HEAD_MZLO, HEAD_MZHI, HEAD_RTLO, HEAD_RTHI };
        for (int i = 0; i < colNames.length; i++) {
            Integer index = header.get(colNames[i]);
            if (index == null)
                throw new DataLoadingException(String.format("Missing header column [%s]", colNames[i]));
            indexesMzRtColorOpacity[i] = index;
        }
        Integer indexColor = header.get(HEAD_COLOR);
        if (indexColor != null && indexColor >= 0)
            indexesMzRtColorOpacity[4] = indexColor;
        Integer indexOpacity = header.get(HEAD_OPACITY);
        if (indexOpacity != null && indexOpacity >= 0)
            indexesMzRtColorOpacity[5] = indexOpacity;

    } catch (IOException ex) {
        throw new DataLoadingException(ex);
    }
}