List of usage examples for org.apache.commons.csv CSVParser CSVParser
public CSVParser(final Reader reader, final CSVFormat format) throws IOException
If you do not read all records from the given reader , you should call #close() on the parser, unless you close the reader .
From source file:GUI.ReadFile.java
public List<Phase> readPhase(String fileName) { FileReader fileReader;/*from www . j a v a2s .com*/ CSVParser csvFileParser; CSVFormat csvFileFormat = CSVFormat.DEFAULT.withHeader(PHASE_HEADER_MAPPING); List<Phase> phase_list = new ArrayList<>(); try { fileReader = new FileReader(fileName); //initialize CSVParser object csvFileParser = new CSVParser(fileReader, csvFileFormat); //Get a list of CSV file records List<CSVRecord> csvRecords = csvFileParser.getRecords(); //Read the CSV file records starting from the second record to skip the header for (int i = 1; i < csvRecords.size(); i++) { CSVRecord record = csvRecords.get(i); Phase p = new Phase(record.get(st_time), record.get(end_time), record.get(category)); phase_list.add(p); } fileReader.close(); csvFileParser.close(); System.out.println(fileName + " Phase file read!"); } catch (FileNotFoundException e) { System.out.println(fileName + " Phase file missing ..."); return null; } catch (IOException ex) { System.out.println(fileName + " csv file error !!!"); return null; } catch (ParseException ex) { System.out.println(fileName + " phase parsing error !!!"); return null; } return phase_list; }
From source file:com.cotrino.langnet.GenerateVisualization.java
private void generateLanguages(String summaryFile, String languagesFile) throws IOException { HashMap<String, Integer> list = new HashMap<String, Integer>(); Reader reader = new FileReader(summaryFile); CSVParser parser = new CSVParser(reader, csvFormat); for (CSVRecord record : parser) { String languageA = record.get("LanguageA"); String languageB = record.get("LanguageB"); int words = Integer.parseInt(record.get("ExecutedComparisons")); list.put(languageA, Math.max(words, list.getOrDefault(languageA, 0))); list.put(languageB, Math.max(words, list.getOrDefault(languageB, 0))); }//from w w w.ja va 2 s . co m parser.close(); reader.close(); String content = "Language;Words;Family;\n"; for (String language : list.keySet()) { content += language + ";" + list.get(language) + ";Romance;\n"; } IOUtil.write(languagesFile, content); }
From source file:com.edu.duke.URLResource.java
/** * Returns a <code>CSVParser</code> object to access the contents of an open web page, possibly * without a header row and a different data delimiter than a comma. * //from w w w . jav a 2s . co m * Each line of the web page should be formatted as data separated by the delimiter passed as a * parameter and with/without a header row to describe the column names. This is useful if the * data is separated by some character other than a comma. * * @param withHeader uses first row of data as a header row only if true * @param delimiter a single character that separates one field of data from another * @return a <code>CSVParser</code> that can provide access to the records in the web page one * at a time */ public CSVParser getCSVParser(boolean withHeader, String delimiter) { if (delimiter == null || delimiter.length() != 1) { throw new ResourceException("URLResource: CSV delimiter must be a single character: " + delimiter); } try { char delim = delimiter.charAt(0); Reader input = new StringReader(mySource); if (withHeader) { return new CSVParser(input, CSVFormat.EXCEL.withHeader().withDelimiter(delim)); } else { return new CSVParser(input, CSVFormat.EXCEL.withDelimiter(delim)); } } catch (Exception e) { throw new ResourceException("URLResource: cannot read " + myPath + " as a CSV file."); } }
From source file:acmi.l2.clientmod.xdat.XdatEditor.java
private void loadSchema() { String versionsFilePath = "/versions.csv"; try (CSVParser parser = new CSVParser( new InputStreamReader(getClass().getResourceAsStream(versionsFilePath)), CSVFormat.DEFAULT)) { for (CSVRecord record : parser.getRecords()) { String name = record.get(0); String className = record.get(1); controller.registerVersion(name, className); }// www. ja v a 2s.com } catch (Exception e) { log.log(Level.WARNING, versionsFilePath + " read error", e); Dialogs.show(Alert.AlertType.WARNING, e.getClass().getSimpleName(), null, e.getMessage()); } }
From source file:edu.washington.gs.skyline.model.quantification.QuantificationTest.java
private List<InputRecord> readInputRecords(String filename) throws Exception { List<InputRecord> list = new ArrayList<>(); Reader reader = new InputStreamReader(QuantificationTest.class.getResourceAsStream(filename)); try {// ww w . j a va 2 s.c o m CSVParser parser = new CSVParser(reader, CSVFormat.EXCEL.withHeader()); for (CSVRecord record : parser.getRecords()) { list.add(new InputRecord(record)); } } finally { reader.close(); } return list; }
From source file:com.publictransitanalytics.scoregenerator.datalayer.directories.GTFSReadingStopTimesDirectory.java
private void parseFrequenciesFile(final ImmutableMultimap.Builder<String, FrequencyRecord> builder, final Reader frequenciesReader) throws FileNotFoundException, IOException { final CSVParser frequenciesParser = new CSVParser(frequenciesReader, CSVFormat.DEFAULT.withHeader()); final List<CSVRecord> frequenciesRecords = frequenciesParser.getRecords(); for (CSVRecord record : frequenciesRecords) { final String tripId = record.get("trip_id"); final FrequencyRecord frequencyRecord = new FrequencyRecord(tripId, TransitTime.parse(record.get("start_time")), TransitTime.parse(record.get("end_time")), Duration.ofSeconds(Long.parseLong(record.get("headway_secs")))); builder.put(tripId, frequencyRecord); }/*w w w. j a va 2 s . c o m*/ }
From source file:com.publictransitanalytics.scoregenerator.datalayer.directories.GTFSReadingServiceTypeCalendar.java
private void parseCalendarDatesFile(final Reader calendarDatesReader, final Multimap<LocalDate, String> serviceTypesMap) throws FileNotFoundException, IOException { final CSVParser calendarDatesParser = new CSVParser(calendarDatesReader, CSVFormat.DEFAULT.withHeader()); final List<CSVRecord> calendarDatesRecords = calendarDatesParser.getRecords(); for (final CSVRecord record : calendarDatesRecords) { final String serviceType = record.get("service_id"); final LocalDate date = LocalDate.parse(record.get("date"), DateTimeFormatter.BASIC_ISO_DATE); final String exceptionType = record.get("exception_type"); switch (exceptionType) { case "1": serviceTypesMap.put(date, serviceType); break; case "2": serviceTypesMap.remove(date, serviceType); break; default:/*from w w w .ja va 2 s.c om*/ throw new ScoreGeneratorFatalException(String.format("Invalid exception type %s", exceptionType)); } } }
From source file:ca.nrc.cadc.tap.db.AsciiTableData.java
/** * Constructor./* w ww . j a v a 2s. c o m*/ * * @param in The data stream * @param contentType The content type of the data * @throws IOException If a data handling error occurs */ public AsciiTableData(InputStream in, String contentType) throws IOException { char delimiter = ','; if (contentType.equals(TableContentHandler.CONTENT_TYPE_TSV)) { delimiter = '\t'; } InputStreamReader ir = new InputStreamReader(in); if (TableContentHandler.CONTENT_TYPE_TSV.equals(contentType)) { this.reader = new CSVParser(ir, CSVFormat.TDF.withFirstRecordAsHeader()); } else if (TableContentHandler.CONTENT_TYPE_CSV.equals(contentType)) { this.reader = new CSVParser(ir, CSVFormat.DEFAULT.withFirstRecordAsHeader()); } else { throw new UnsupportedOperationException("contentType: " + contentType); } this.rowIterator = reader.iterator(); Map<String, Integer> header = reader.getHeaderMap(); columnNames = new ArrayList<String>(header.size()); for (String s : header.keySet()) { columnNames.add(s.trim()); log.debug("found column: " + s); } if (columnNames.isEmpty()) { throw new IllegalArgumentException("No data columns."); } }
From source file:edu.harvard.iq.dataverse.ingest.tabulardata.impl.plugins.csv.CSVFileReader.java
public int readFile(BufferedReader csvReader, DataTable dataTable, PrintWriter finalOut) throws IOException { List<DataVariable> variableList = new ArrayList<>(); CSVParser parser = new CSVParser(csvReader, inFormat.withHeader()); Map<String, Integer> headers = parser.getHeaderMap(); int i = 0;//from w w w.j a va 2 s . c o m for (String varName : headers.keySet()) { if (varName == null || varName.isEmpty()) { // TODO: // Add a sensible variable name validation algorithm. // -- L.A. 4.0 alpha 1 throw new IOException(BundleUtil.getStringFromBundle("ingest.csv.invalidHeader")); } DataVariable dv = new DataVariable(); dv.setName(varName); dv.setLabel(varName); dv.setInvalidRanges(new ArrayList<>()); dv.setSummaryStatistics(new ArrayList<>()); dv.setUnf("UNF:6:NOTCALCULATED"); dv.setCategories(new ArrayList<>()); variableList.add(dv); dv.setTypeCharacter(); dv.setIntervalDiscrete(); dv.setFileOrder(i); dv.setDataTable(dataTable); i++; } dataTable.setVarQuantity((long) variableList.size()); dataTable.setDataVariables(variableList); boolean[] isNumericVariable = new boolean[headers.size()]; boolean[] isIntegerVariable = new boolean[headers.size()]; boolean[] isTimeVariable = new boolean[headers.size()]; boolean[] isDateVariable = new boolean[headers.size()]; for (i = 0; i < headers.size(); i++) { // OK, let's assume that every variable is numeric; // but we'll go through the file and examine every value; the // moment we find a value that's not a legit numeric one, we'll // assume that it is in fact a String. isNumericVariable[i] = true; isIntegerVariable[i] = true; isDateVariable[i] = true; isTimeVariable[i] = true; } // First, "learning" pass. // (we'll save the incoming stream in another temp file:) SimpleDateFormat[] selectedDateTimeFormat = new SimpleDateFormat[headers.size()]; SimpleDateFormat[] selectedDateFormat = new SimpleDateFormat[headers.size()]; File firstPassTempFile = File.createTempFile("firstpass-", ".csv"); try (CSVPrinter csvFilePrinter = new CSVPrinter( // TODO allow other parsers of tabular data to use this parser by changin inFormat new FileWriter(firstPassTempFile.getAbsolutePath()), inFormat)) { //Write headers csvFilePrinter.printRecord(headers.keySet()); for (CSVRecord record : parser.getRecords()) { // Checks if #records = #columns in header if (!record.isConsistent()) { List<String> args = Arrays.asList(new String[] { "" + (parser.getCurrentLineNumber() - 1), "" + headers.size(), "" + record.size() }); throw new IOException(BundleUtil.getStringFromBundle("ingest.csv.recordMismatch", args)); } for (i = 0; i < headers.size(); i++) { String varString = record.get(i); isIntegerVariable[i] = isIntegerVariable[i] && varString != null && (varString.isEmpty() || varString.equals("null") || (firstNumCharSet.contains(varString.charAt(0)) && StringUtils.isNumeric(varString.substring(1)))); if (isNumericVariable[i]) { // If variable might be "numeric" test to see if this value is a parsable number: if (varString != null && !varString.isEmpty()) { boolean isNumeric = false; boolean isInteger = false; if (varString.equalsIgnoreCase("NaN") || varString.equalsIgnoreCase("NA") || varString.equalsIgnoreCase("Inf") || varString.equalsIgnoreCase("+Inf") || varString.equalsIgnoreCase("-Inf") || varString.equalsIgnoreCase("null")) { continue; } else { try { Double testDoubleValue = new Double(varString); continue; } catch (NumberFormatException ex) { // the token failed to parse as a double // so the column is a string variable. } } isNumericVariable[i] = false; } } // If this is not a numeric column, see if it is a date collumn // by parsing the cell as a date or date-time value: if (!isNumericVariable[i]) { Date dateResult = null; if (isTimeVariable[i]) { if (varString != null && !varString.isEmpty()) { boolean isTime = false; if (selectedDateTimeFormat[i] != null) { ParsePosition pos = new ParsePosition(0); dateResult = selectedDateTimeFormat[i].parse(varString, pos); if (dateResult != null && pos.getIndex() == varString.length()) { // OK, successfully parsed a value! isTime = true; } } else { for (SimpleDateFormat format : TIME_FORMATS) { ParsePosition pos = new ParsePosition(0); dateResult = format.parse(varString, pos); if (dateResult != null && pos.getIndex() == varString.length()) { // OK, successfully parsed a value! isTime = true; selectedDateTimeFormat[i] = format; break; } } } if (!isTime) { isTimeVariable[i] = false; // if the token didn't parse as a time value, // we will still try to parse it as a date, below. // unless this column is NOT a date. } else { // And if it is a time value, we are going to assume it's // NOT a date. isDateVariable[i] = false; } } } if (isDateVariable[i]) { if (varString != null && !varString.isEmpty()) { boolean isDate = false; // TODO: // Strictly speaking, we should be doing the same thing // here as with the time formats above; select the // first one that works, then insist that all the // other values in this column match it... but we // only have one, as of now, so it should be ok. // -- L.A. 4.0 beta for (SimpleDateFormat format : DATE_FORMATS) { // Strict parsing - it will throw an // exception if it doesn't parse! format.setLenient(false); try { format.parse(varString); isDate = true; selectedDateFormat[i] = format; break; } catch (ParseException ex) { //Do nothing } } isDateVariable[i] = isDate; } } } } csvFilePrinter.printRecord(record); } } dataTable.setCaseQuantity(parser.getRecordNumber()); parser.close(); csvReader.close(); // Re-type the variables that we've determined are numerics: for (i = 0; i < headers.size(); i++) { if (isNumericVariable[i]) { dataTable.getDataVariables().get(i).setTypeNumeric(); if (isIntegerVariable[i]) { dataTable.getDataVariables().get(i).setIntervalDiscrete(); } else { dataTable.getDataVariables().get(i).setIntervalContinuous(); } } else if (isDateVariable[i] && selectedDateFormat[i] != null) { // Dates are still Strings, i.e., they are "character" and "discrete"; // But we add special format values for them: dataTable.getDataVariables().get(i).setFormat(DATE_FORMATS[0].toPattern()); dataTable.getDataVariables().get(i).setFormatCategory("date"); } else if (isTimeVariable[i] && selectedDateTimeFormat[i] != null) { // Same for time values: dataTable.getDataVariables().get(i).setFormat(selectedDateTimeFormat[i].toPattern()); dataTable.getDataVariables().get(i).setFormatCategory("time"); } } // Second, final pass. try (BufferedReader secondPassReader = new BufferedReader(new FileReader(firstPassTempFile))) { parser = new CSVParser(secondPassReader, inFormat.withHeader()); String[] caseRow = new String[headers.size()]; for (CSVRecord record : parser) { if (!record.isConsistent()) { List<String> args = Arrays.asList(new String[] { "" + (parser.getCurrentLineNumber() - 1), "" + headers.size(), "" + record.size() }); throw new IOException(BundleUtil.getStringFromBundle("ingest.csv.recordMismatch", args)); } for (i = 0; i < headers.size(); i++) { String varString = record.get(i); if (isNumericVariable[i]) { if (varString == null || varString.isEmpty() || varString.equalsIgnoreCase("NA")) { // Missing value - represented as an empty string in // the final tab file caseRow[i] = ""; } else if (varString.equalsIgnoreCase("NaN")) { // "Not a Number" special value: caseRow[i] = "NaN"; } else if (varString.equalsIgnoreCase("Inf") || varString.equalsIgnoreCase("+Inf")) { // Positive infinity: caseRow[i] = "Inf"; } else if (varString.equalsIgnoreCase("-Inf")) { // Negative infinity: caseRow[i] = "-Inf"; } else if (varString.equalsIgnoreCase("null")) { // By request from Gus - "NULL" is recognized as a // numeric zero: caseRow[i] = isIntegerVariable[i] ? "0" : "0.0"; } else { /* No re-formatting is done on any other numeric values. * We'll save them as they were, for archival purposes. * The alternative solution - formatting in sci. notation * is commented-out below. */ caseRow[i] = varString; /* if (isIntegerVariable[i]) { try { Integer testIntegerValue = new Integer(varString); caseRow[i] = testIntegerValue.toString(); } catch (NumberFormatException ex) { throw new IOException("Failed to parse a value recognized as an integer in the first pass! (?)"); } } else { try { Double testDoubleValue = new Double(varString); if (testDoubleValue.equals(0.0)) { caseRow[i] = "0.0"; } else { // One possible implementation: // // Round our fractional values to 15 digits // (minimum number of digits of precision guaranteed by // type Double) and format the resulting representations // in a IEEE 754-like "scientific notation" - for ex., // 753.24 will be encoded as 7.5324e2 BigDecimal testBigDecimal = new BigDecimal(varString, doubleMathContext); caseRow[i] = String.format(FORMAT_IEEE754, testBigDecimal); // Strip meaningless zeros and extra + signs: caseRow[i] = caseRow[i].replaceFirst("00*e", "e"); caseRow[i] = caseRow[i].replaceFirst("\\.e", ".0e"); caseRow[i] = caseRow[i].replaceFirst("e\\+00", ""); caseRow[i] = caseRow[i].replaceFirst("^\\+", ""); } } catch (NumberFormatException ex) { throw new IOException("Failed to parse a value recognized as numeric in the first pass! (?)"); } } */ } } else if (isTimeVariable[i] || isDateVariable[i]) { // Time and Dates are stored NOT quoted (don't ask). if (varString != null) { // Dealing with quotes: // remove the leading and trailing quotes, if present: varString = varString.replaceFirst("^\"*", ""); varString = varString.replaceFirst("\"*$", ""); caseRow[i] = varString; } else { caseRow[i] = ""; } } else { // Treat as a String: // Strings are stored in tab files quoted; // Missing values are stored as an empty string // between two tabs (or one tab and the new line); // Empty strings stored as "" (quoted empty string). // For the purposes of this CSV ingest reader, we are going // to assume that all the empty strings in the file are // indeed empty strings, and NOT missing values: if (varString != null) { // escape the quotes, newlines, and tabs: varString = varString.replace("\"", "\\\""); varString = varString.replace("\n", "\\n"); varString = varString.replace("\t", "\\t"); // final pair of quotes: varString = "\"" + varString + "\""; caseRow[i] = varString; } else { caseRow[i] = "\"\""; } } } finalOut.println(StringUtils.join(caseRow, "\t")); } } long linecount = parser.getRecordNumber(); finalOut.close(); parser.close(); dbglog.fine("Tmp File: " + firstPassTempFile); // Firstpass file is deleted to prevent tmp from filling up. firstPassTempFile.delete(); if (dataTable.getCaseQuantity().intValue() != linecount) { List<String> args = Arrays .asList(new String[] { "" + dataTable.getCaseQuantity().intValue(), "" + linecount }); throw new IOException(BundleUtil.getStringFromBundle("ingest.csv.line_mismatch", args)); } return (int) linecount; }
From source file:com.cotrino.langnet.GenerateVisualization.java
private void getLanguages(String file) throws IOException { amountWordsPerLanguage = new HashMap<String, Integer>(); Reader reader = new FileReader(file); CSVParser parser = new CSVParser(reader, csvFormat); for (CSVRecord record : parser) { String language = record.get("Language"); int words = Integer.parseInt(record.get("Words")); amountWordsPerLanguage.put(language, words); }// ww w . ja v a 2s .co m parser.close(); reader.close(); }