Example usage for org.apache.commons.csv CSVParser CSVParser

List of usage examples for org.apache.commons.csv CSVParser CSVParser

Introduction

In this page you can find the example usage for org.apache.commons.csv CSVParser CSVParser.

Prototype

public CSVParser(final Reader reader, final CSVFormat format) throws IOException 

Source Link

Document

Customized CSV parser using the given CSVFormat

If you do not read all records from the given reader , you should call #close() on the parser, unless you close the reader .

Usage

From source file:GUI.ReadFile.java

public List<Phase> readPhase(String fileName) {
    FileReader fileReader;/*from   www  . j a  v  a2s  .com*/
    CSVParser csvFileParser;
    CSVFormat csvFileFormat = CSVFormat.DEFAULT.withHeader(PHASE_HEADER_MAPPING);
    List<Phase> phase_list = new ArrayList<>();
    try {
        fileReader = new FileReader(fileName);
        //initialize CSVParser object
        csvFileParser = new CSVParser(fileReader, csvFileFormat);
        //Get a list of CSV file records
        List<CSVRecord> csvRecords = csvFileParser.getRecords();
        //Read the CSV file records starting from the second record to skip the header
        for (int i = 1; i < csvRecords.size(); i++) {
            CSVRecord record = csvRecords.get(i);
            Phase p = new Phase(record.get(st_time), record.get(end_time), record.get(category));
            phase_list.add(p);
        }
        fileReader.close();
        csvFileParser.close();
        System.out.println(fileName + " Phase file read!");
    } catch (FileNotFoundException e) {
        System.out.println(fileName + " Phase file missing ...");
        return null;
    } catch (IOException ex) {
        System.out.println(fileName + " csv file error !!!");
        return null;
    } catch (ParseException ex) {
        System.out.println(fileName + " phase parsing error !!!");
        return null;
    }
    return phase_list;
}

From source file:com.cotrino.langnet.GenerateVisualization.java

private void generateLanguages(String summaryFile, String languagesFile) throws IOException {

    HashMap<String, Integer> list = new HashMap<String, Integer>();
    Reader reader = new FileReader(summaryFile);
    CSVParser parser = new CSVParser(reader, csvFormat);
    for (CSVRecord record : parser) {
        String languageA = record.get("LanguageA");
        String languageB = record.get("LanguageB");
        int words = Integer.parseInt(record.get("ExecutedComparisons"));
        list.put(languageA, Math.max(words, list.getOrDefault(languageA, 0)));
        list.put(languageB, Math.max(words, list.getOrDefault(languageB, 0)));
    }//from  w  w w.ja va 2 s . co m
    parser.close();
    reader.close();

    String content = "Language;Words;Family;\n";
    for (String language : list.keySet()) {
        content += language + ";" + list.get(language) + ";Romance;\n";
    }
    IOUtil.write(languagesFile, content);
}

From source file:com.edu.duke.URLResource.java

/**
 * Returns a <code>CSVParser</code> object to access the contents of an open web page, possibly
 * without a header row and a different data delimiter than a comma.
 * //from   w  w  w  .  jav a 2s  .  co  m
 * Each line of the web page should be formatted as data separated by the delimiter passed as a
 * parameter and with/without a header row to describe the column names. This is useful if the
 * data is separated by some character other than a comma.
 * 
 * @param withHeader uses first row of data as a header row only if true
 * @param delimiter a single character that separates one field of data from another
 * @return a <code>CSVParser</code> that can provide access to the records in the web page one
 *         at a time
 */
public CSVParser getCSVParser(boolean withHeader, String delimiter) {
    if (delimiter == null || delimiter.length() != 1) {
        throw new ResourceException("URLResource: CSV delimiter must be a single character: " + delimiter);
    }
    try {
        char delim = delimiter.charAt(0);
        Reader input = new StringReader(mySource);
        if (withHeader) {
            return new CSVParser(input, CSVFormat.EXCEL.withHeader().withDelimiter(delim));
        } else {
            return new CSVParser(input, CSVFormat.EXCEL.withDelimiter(delim));
        }
    } catch (Exception e) {
        throw new ResourceException("URLResource: cannot read " + myPath + " as a CSV file.");
    }
}

From source file:acmi.l2.clientmod.xdat.XdatEditor.java

private void loadSchema() {
    String versionsFilePath = "/versions.csv";
    try (CSVParser parser = new CSVParser(
            new InputStreamReader(getClass().getResourceAsStream(versionsFilePath)), CSVFormat.DEFAULT)) {
        for (CSVRecord record : parser.getRecords()) {
            String name = record.get(0);
            String className = record.get(1);
            controller.registerVersion(name, className);
        }//  www.  ja v  a 2s.com
    } catch (Exception e) {
        log.log(Level.WARNING, versionsFilePath + " read error", e);
        Dialogs.show(Alert.AlertType.WARNING, e.getClass().getSimpleName(), null, e.getMessage());
    }
}

From source file:edu.washington.gs.skyline.model.quantification.QuantificationTest.java

private List<InputRecord> readInputRecords(String filename) throws Exception {
    List<InputRecord> list = new ArrayList<>();
    Reader reader = new InputStreamReader(QuantificationTest.class.getResourceAsStream(filename));
    try {//  ww  w  .  j  a va 2  s.c o m
        CSVParser parser = new CSVParser(reader, CSVFormat.EXCEL.withHeader());
        for (CSVRecord record : parser.getRecords()) {
            list.add(new InputRecord(record));
        }

    } finally {
        reader.close();
    }
    return list;
}

From source file:com.publictransitanalytics.scoregenerator.datalayer.directories.GTFSReadingStopTimesDirectory.java

private void parseFrequenciesFile(final ImmutableMultimap.Builder<String, FrequencyRecord> builder,
        final Reader frequenciesReader) throws FileNotFoundException, IOException {

    final CSVParser frequenciesParser = new CSVParser(frequenciesReader, CSVFormat.DEFAULT.withHeader());
    final List<CSVRecord> frequenciesRecords = frequenciesParser.getRecords();

    for (CSVRecord record : frequenciesRecords) {
        final String tripId = record.get("trip_id");

        final FrequencyRecord frequencyRecord = new FrequencyRecord(tripId,
                TransitTime.parse(record.get("start_time")), TransitTime.parse(record.get("end_time")),
                Duration.ofSeconds(Long.parseLong(record.get("headway_secs"))));
        builder.put(tripId, frequencyRecord);
    }/*w  w  w. j  a  va 2  s  . c  o m*/
}

From source file:com.publictransitanalytics.scoregenerator.datalayer.directories.GTFSReadingServiceTypeCalendar.java

private void parseCalendarDatesFile(final Reader calendarDatesReader,
        final Multimap<LocalDate, String> serviceTypesMap) throws FileNotFoundException, IOException {

    final CSVParser calendarDatesParser = new CSVParser(calendarDatesReader, CSVFormat.DEFAULT.withHeader());
    final List<CSVRecord> calendarDatesRecords = calendarDatesParser.getRecords();

    for (final CSVRecord record : calendarDatesRecords) {
        final String serviceType = record.get("service_id");
        final LocalDate date = LocalDate.parse(record.get("date"), DateTimeFormatter.BASIC_ISO_DATE);
        final String exceptionType = record.get("exception_type");

        switch (exceptionType) {
        case "1":
            serviceTypesMap.put(date, serviceType);
            break;
        case "2":
            serviceTypesMap.remove(date, serviceType);
            break;
        default:/*from  w w  w .ja va  2 s.c  om*/
            throw new ScoreGeneratorFatalException(String.format("Invalid exception type %s", exceptionType));
        }
    }
}

From source file:ca.nrc.cadc.tap.db.AsciiTableData.java

/**
 * Constructor./*  w  ww  .  j a  v  a  2s.  c  o m*/
 * 
 * @param in The data stream
 * @param contentType The content type of the data
 * @throws IOException If a data handling error occurs
 */
public AsciiTableData(InputStream in, String contentType) throws IOException {
    char delimiter = ',';
    if (contentType.equals(TableContentHandler.CONTENT_TYPE_TSV)) {
        delimiter = '\t';
    }
    InputStreamReader ir = new InputStreamReader(in);

    if (TableContentHandler.CONTENT_TYPE_TSV.equals(contentType)) {
        this.reader = new CSVParser(ir, CSVFormat.TDF.withFirstRecordAsHeader());
    } else if (TableContentHandler.CONTENT_TYPE_CSV.equals(contentType)) {
        this.reader = new CSVParser(ir, CSVFormat.DEFAULT.withFirstRecordAsHeader());
    } else {
        throw new UnsupportedOperationException("contentType: " + contentType);
    }

    this.rowIterator = reader.iterator();
    Map<String, Integer> header = reader.getHeaderMap();
    columnNames = new ArrayList<String>(header.size());
    for (String s : header.keySet()) {
        columnNames.add(s.trim());
        log.debug("found column: " + s);
    }
    if (columnNames.isEmpty()) {
        throw new IllegalArgumentException("No data columns.");
    }
}

From source file:edu.harvard.iq.dataverse.ingest.tabulardata.impl.plugins.csv.CSVFileReader.java

public int readFile(BufferedReader csvReader, DataTable dataTable, PrintWriter finalOut) throws IOException {

    List<DataVariable> variableList = new ArrayList<>();
    CSVParser parser = new CSVParser(csvReader, inFormat.withHeader());
    Map<String, Integer> headers = parser.getHeaderMap();

    int i = 0;//from w  w w.j  a va  2 s . c o  m
    for (String varName : headers.keySet()) {
        if (varName == null || varName.isEmpty()) {
            // TODO:
            // Add a sensible variable name validation algorithm.
            // -- L.A. 4.0 alpha 1
            throw new IOException(BundleUtil.getStringFromBundle("ingest.csv.invalidHeader"));
        }

        DataVariable dv = new DataVariable();
        dv.setName(varName);
        dv.setLabel(varName);
        dv.setInvalidRanges(new ArrayList<>());
        dv.setSummaryStatistics(new ArrayList<>());
        dv.setUnf("UNF:6:NOTCALCULATED");
        dv.setCategories(new ArrayList<>());
        variableList.add(dv);

        dv.setTypeCharacter();
        dv.setIntervalDiscrete();
        dv.setFileOrder(i);
        dv.setDataTable(dataTable);
        i++;
    }

    dataTable.setVarQuantity((long) variableList.size());
    dataTable.setDataVariables(variableList);

    boolean[] isNumericVariable = new boolean[headers.size()];
    boolean[] isIntegerVariable = new boolean[headers.size()];
    boolean[] isTimeVariable = new boolean[headers.size()];
    boolean[] isDateVariable = new boolean[headers.size()];

    for (i = 0; i < headers.size(); i++) {
        // OK, let's assume that every variable is numeric;
        // but we'll go through the file and examine every value; the
        // moment we find a value that's not a legit numeric one, we'll
        // assume that it is in fact a String.
        isNumericVariable[i] = true;
        isIntegerVariable[i] = true;
        isDateVariable[i] = true;
        isTimeVariable[i] = true;
    }

    // First, "learning" pass.
    // (we'll save the incoming stream in another temp file:)
    SimpleDateFormat[] selectedDateTimeFormat = new SimpleDateFormat[headers.size()];
    SimpleDateFormat[] selectedDateFormat = new SimpleDateFormat[headers.size()];

    File firstPassTempFile = File.createTempFile("firstpass-", ".csv");

    try (CSVPrinter csvFilePrinter = new CSVPrinter(
            // TODO allow other parsers of tabular data to use this parser by changin inFormat
            new FileWriter(firstPassTempFile.getAbsolutePath()), inFormat)) {
        //Write  headers
        csvFilePrinter.printRecord(headers.keySet());
        for (CSVRecord record : parser.getRecords()) {
            // Checks if #records = #columns in header
            if (!record.isConsistent()) {
                List<String> args = Arrays.asList(new String[] { "" + (parser.getCurrentLineNumber() - 1),
                        "" + headers.size(), "" + record.size() });
                throw new IOException(BundleUtil.getStringFromBundle("ingest.csv.recordMismatch", args));
            }

            for (i = 0; i < headers.size(); i++) {
                String varString = record.get(i);
                isIntegerVariable[i] = isIntegerVariable[i] && varString != null
                        && (varString.isEmpty() || varString.equals("null")
                                || (firstNumCharSet.contains(varString.charAt(0))
                                        && StringUtils.isNumeric(varString.substring(1))));
                if (isNumericVariable[i]) {
                    // If variable might be "numeric" test to see if this value is a parsable number:
                    if (varString != null && !varString.isEmpty()) {

                        boolean isNumeric = false;
                        boolean isInteger = false;

                        if (varString.equalsIgnoreCase("NaN") || varString.equalsIgnoreCase("NA")
                                || varString.equalsIgnoreCase("Inf") || varString.equalsIgnoreCase("+Inf")
                                || varString.equalsIgnoreCase("-Inf") || varString.equalsIgnoreCase("null")) {
                            continue;
                        } else {
                            try {
                                Double testDoubleValue = new Double(varString);
                                continue;
                            } catch (NumberFormatException ex) {
                                // the token failed to parse as a double
                                // so the column is a string variable.
                            }
                        }
                        isNumericVariable[i] = false;
                    }
                }

                // If this is not a numeric column, see if it is a date collumn
                // by parsing the cell as a date or date-time value:
                if (!isNumericVariable[i]) {

                    Date dateResult = null;

                    if (isTimeVariable[i]) {
                        if (varString != null && !varString.isEmpty()) {
                            boolean isTime = false;

                            if (selectedDateTimeFormat[i] != null) {
                                ParsePosition pos = new ParsePosition(0);
                                dateResult = selectedDateTimeFormat[i].parse(varString, pos);

                                if (dateResult != null && pos.getIndex() == varString.length()) {
                                    // OK, successfully parsed a value!
                                    isTime = true;
                                }
                            } else {
                                for (SimpleDateFormat format : TIME_FORMATS) {
                                    ParsePosition pos = new ParsePosition(0);
                                    dateResult = format.parse(varString, pos);
                                    if (dateResult != null && pos.getIndex() == varString.length()) {
                                        // OK, successfully parsed a value!
                                        isTime = true;
                                        selectedDateTimeFormat[i] = format;
                                        break;
                                    }
                                }
                            }
                            if (!isTime) {
                                isTimeVariable[i] = false;
                                // if the token didn't parse as a time value,
                                // we will still try to parse it as a date, below.
                                // unless this column is NOT a date.
                            } else {
                                // And if it is a time value, we are going to assume it's
                                // NOT a date.
                                isDateVariable[i] = false;
                            }
                        }
                    }

                    if (isDateVariable[i]) {
                        if (varString != null && !varString.isEmpty()) {
                            boolean isDate = false;

                            // TODO:
                            // Strictly speaking, we should be doing the same thing
                            // here as with the time formats above; select the
                            // first one that works, then insist that all the
                            // other values in this column match it... but we
                            // only have one, as of now, so it should be ok.
                            // -- L.A. 4.0 beta
                            for (SimpleDateFormat format : DATE_FORMATS) {
                                // Strict parsing - it will throw an
                                // exception if it doesn't parse!
                                format.setLenient(false);
                                try {
                                    format.parse(varString);
                                    isDate = true;
                                    selectedDateFormat[i] = format;
                                    break;
                                } catch (ParseException ex) {
                                    //Do nothing
                                }
                            }
                            isDateVariable[i] = isDate;
                        }
                    }
                }
            }

            csvFilePrinter.printRecord(record);
        }
    }
    dataTable.setCaseQuantity(parser.getRecordNumber());
    parser.close();
    csvReader.close();

    // Re-type the variables that we've determined are numerics:
    for (i = 0; i < headers.size(); i++) {
        if (isNumericVariable[i]) {
            dataTable.getDataVariables().get(i).setTypeNumeric();

            if (isIntegerVariable[i]) {
                dataTable.getDataVariables().get(i).setIntervalDiscrete();
            } else {
                dataTable.getDataVariables().get(i).setIntervalContinuous();
            }
        } else if (isDateVariable[i] && selectedDateFormat[i] != null) {
            // Dates are still Strings, i.e., they are "character" and "discrete";
            // But we add special format values for them:
            dataTable.getDataVariables().get(i).setFormat(DATE_FORMATS[0].toPattern());
            dataTable.getDataVariables().get(i).setFormatCategory("date");
        } else if (isTimeVariable[i] && selectedDateTimeFormat[i] != null) {
            // Same for time values:
            dataTable.getDataVariables().get(i).setFormat(selectedDateTimeFormat[i].toPattern());
            dataTable.getDataVariables().get(i).setFormatCategory("time");
        }
    }
    // Second, final pass.
    try (BufferedReader secondPassReader = new BufferedReader(new FileReader(firstPassTempFile))) {
        parser = new CSVParser(secondPassReader, inFormat.withHeader());
        String[] caseRow = new String[headers.size()];

        for (CSVRecord record : parser) {
            if (!record.isConsistent()) {
                List<String> args = Arrays.asList(new String[] { "" + (parser.getCurrentLineNumber() - 1),
                        "" + headers.size(), "" + record.size() });
                throw new IOException(BundleUtil.getStringFromBundle("ingest.csv.recordMismatch", args));
            }

            for (i = 0; i < headers.size(); i++) {
                String varString = record.get(i);
                if (isNumericVariable[i]) {
                    if (varString == null || varString.isEmpty() || varString.equalsIgnoreCase("NA")) {
                        // Missing value - represented as an empty string in
                        // the final tab file
                        caseRow[i] = "";
                    } else if (varString.equalsIgnoreCase("NaN")) {
                        // "Not a Number" special value:
                        caseRow[i] = "NaN";
                    } else if (varString.equalsIgnoreCase("Inf") || varString.equalsIgnoreCase("+Inf")) {
                        // Positive infinity:
                        caseRow[i] = "Inf";
                    } else if (varString.equalsIgnoreCase("-Inf")) {
                        // Negative infinity:
                        caseRow[i] = "-Inf";
                    } else if (varString.equalsIgnoreCase("null")) {
                        // By request from Gus - "NULL" is recognized as a
                        // numeric zero:
                        caseRow[i] = isIntegerVariable[i] ? "0" : "0.0";
                    } else {
                        /* No re-formatting is done on any other numeric values.
                         * We'll save them as they were, for archival purposes.
                         * The alternative solution - formatting in sci. notation
                         * is commented-out below.
                         */
                        caseRow[i] = varString;
                        /*
                         if (isIntegerVariable[i]) {
                        try {
                            Integer testIntegerValue = new Integer(varString);
                            caseRow[i] = testIntegerValue.toString();
                        } catch (NumberFormatException ex) {
                            throw new IOException("Failed to parse a value recognized as an integer in the first pass! (?)");
                        }
                        } else {
                        try {
                            Double testDoubleValue = new Double(varString);
                            if (testDoubleValue.equals(0.0)) {
                                caseRow[i] = "0.0";
                            } else {
                                                                    // One possible implementation:
                                //
                                // Round our fractional values to 15 digits
                                // (minimum number of digits of precision guaranteed by
                                // type Double) and format the resulting representations
                                // in a IEEE 754-like "scientific notation" - for ex.,
                                // 753.24 will be encoded as 7.5324e2
                                BigDecimal testBigDecimal = new BigDecimal(varString, doubleMathContext);
                                caseRow[i] = String.format(FORMAT_IEEE754, testBigDecimal);
                                
                                // Strip meaningless zeros and extra + signs:
                                caseRow[i] = caseRow[i].replaceFirst("00*e", "e");
                                caseRow[i] = caseRow[i].replaceFirst("\\.e", ".0e");
                                caseRow[i] = caseRow[i].replaceFirst("e\\+00", "");
                                caseRow[i] = caseRow[i].replaceFirst("^\\+", "");
                            }
                        } catch (NumberFormatException ex) {
                            throw new IOException("Failed to parse a value recognized as numeric in the first pass! (?)");
                        }
                        }
                         */
                    }
                } else if (isTimeVariable[i] || isDateVariable[i]) {
                    // Time and Dates are stored NOT quoted (don't ask).
                    if (varString != null) {
                        // Dealing with quotes:
                        // remove the leading and trailing quotes, if present:
                        varString = varString.replaceFirst("^\"*", "");
                        varString = varString.replaceFirst("\"*$", "");
                        caseRow[i] = varString;
                    } else {
                        caseRow[i] = "";
                    }
                } else {
                    // Treat as a String:
                    // Strings are stored in tab files quoted;
                    // Missing values are stored as an empty string
                    // between two tabs (or one tab and the new line);
                    // Empty strings stored as "" (quoted empty string).
                    // For the purposes  of this CSV ingest reader, we are going
                    // to assume that all the empty strings in the file are
                    // indeed empty strings, and NOT missing values:
                    if (varString != null) {
                        // escape the quotes, newlines, and tabs:
                        varString = varString.replace("\"", "\\\"");
                        varString = varString.replace("\n", "\\n");
                        varString = varString.replace("\t", "\\t");
                        // final pair of quotes:
                        varString = "\"" + varString + "\"";
                        caseRow[i] = varString;
                    } else {
                        caseRow[i] = "\"\"";
                    }
                }
            }
            finalOut.println(StringUtils.join(caseRow, "\t"));
        }
    }
    long linecount = parser.getRecordNumber();
    finalOut.close();
    parser.close();
    dbglog.fine("Tmp File: " + firstPassTempFile);
    // Firstpass file is deleted to prevent tmp from filling up.
    firstPassTempFile.delete();
    if (dataTable.getCaseQuantity().intValue() != linecount) {
        List<String> args = Arrays
                .asList(new String[] { "" + dataTable.getCaseQuantity().intValue(), "" + linecount });
        throw new IOException(BundleUtil.getStringFromBundle("ingest.csv.line_mismatch", args));
    }
    return (int) linecount;
}

From source file:com.cotrino.langnet.GenerateVisualization.java

private void getLanguages(String file) throws IOException {

    amountWordsPerLanguage = new HashMap<String, Integer>();

    Reader reader = new FileReader(file);
    CSVParser parser = new CSVParser(reader, csvFormat);
    for (CSVRecord record : parser) {
        String language = record.get("Language");
        int words = Integer.parseInt(record.get("Words"));
        amountWordsPerLanguage.put(language, words);
    }//  ww  w  . ja v a 2s .co m
    parser.close();
    reader.close();

}