List of usage examples for org.apache.commons.csv CSVParser getHeaderMap
public Map<String, Integer> getHeaderMap()
From source file:umich.ms.batmass.filesupport.files.types.mzrt.model.MzrtFile.java
public void load() throws DataLoadingException { int[] counts = new int[3]; // [0] - \r\n, [1] - \n, [2] - \r final String[] separators = { "\r\n", "\n", "\r" }; // detecting line separator try (InputStreamReader isr = new InputStreamReader( new BufferedInputStream(new FileInputStream(file.toFile())), Charsets.UTF_8)) { int c;//from w w w.j a v a 2s . c om int encountered = 0; boolean isPrevR = false; int cutoff = 50; readLoop: while ((c = isr.read()) != -1) { char ch = (char) c; switch (ch) { case '\r': if (++counts[2] > cutoff) { break readLoop; } isPrevR = true; break; case '\n': if (isPrevR) { counts[2]--; if (++counts[0] > cutoff) { break readLoop; } } else { if (++counts[1] > cutoff) { break readLoop; } } isPrevR = false; break; default: isPrevR = false; } } } catch (IOException ex) { throw new DataLoadingException("Could not detect line separator", ex); } List<Integer> idxMax = new ArrayList<>(); for (int i = 0; i < counts.length; i++) { if (idxMax.isEmpty()) { idxMax.add(i); } else if (counts[i] > counts[idxMax.get(0)]) { idxMax.clear(); idxMax.add(i); } else if (counts[i] == counts[idxMax.get(0)]) { idxMax.add(i); } } String recordSeparator; if (idxMax.size() > 1) { if (idxMax.contains(0)) { recordSeparator = separators[0]; } else if (idxMax.contains(1)) { recordSeparator = separators[1]; } else { recordSeparator = separators[idxMax.get(0)]; } } else { recordSeparator = separators[idxMax.get(0)]; } // detecting delimiter char delimiter; try (BufferedReader br = new BufferedReader(new FileReader(file.toFile()))) { List<String> lines = new ArrayList<>(); String line; int numTestLines = 10; while ((line = br.readLine()) != null) { if (!line.isEmpty()) { lines.add(line); if (lines.size() >= numTestLines) break; } } delimiter = guessDelimiter(lines); } catch (IOException ex) { throw new DataLoadingException("Could not detect delimiter character", ex); } try (BufferedReader br = new BufferedReader(new FileReader(file.toFile()))) { CSVFormat fmt = CSVFormat.newFormat(delimiter); fmt = fmt.withHeader().withIgnoreEmptyLines(true).withTrim(true).withIgnoreHeaderCase(true) .withQuoteMode(QuoteMode.NON_NUMERIC).withRecordSeparator(recordSeparator).withQuote('"'); CSVParser parser = fmt.parse(br); records = parser.getRecords(); header = parser.getHeaderMap(); String[] colNames = { HEAD_MZLO, HEAD_MZHI, HEAD_RTLO, HEAD_RTHI }; for (int i = 0; i < colNames.length; i++) { Integer index = header.get(colNames[i]); if (index == null) throw new DataLoadingException(String.format("Missing header column [%s]", colNames[i])); indexesMzRtColorOpacity[i] = index; } Integer indexColor = header.get(HEAD_COLOR); if (indexColor != null && indexColor >= 0) indexesMzRtColorOpacity[4] = indexColor; Integer indexOpacity = header.get(HEAD_OPACITY); if (indexOpacity != null && indexOpacity >= 0) indexesMzRtColorOpacity[5] = indexOpacity; } catch (IOException ex) { throw new DataLoadingException(ex); } }
From source file:us.parr.animl.data.DataTable.java
public static DataTable loadCSV(String fileName, String formatType, VariableType[] colTypesOverride, String[] colNamesOverride, boolean hasHeaderRow) { try {/* w w w . j ava 2 s .com*/ // use apache commons io + csv to load but convert to list of String[] // byte-order markers are handled if present at start of file. FileInputStream fis = new FileInputStream(fileName); final Reader reader = new InputStreamReader(new BOMInputStream(fis), "UTF-8"); CSVFormat format; if (formatType == null) { format = hasHeaderRow ? CSVFormat.RFC4180.withHeader() : CSVFormat.RFC4180; } else { switch (formatType.toLowerCase()) { case "tsv": format = hasHeaderRow ? CSVFormat.TDF.withHeader() : CSVFormat.TDF; break; case "mysql": format = hasHeaderRow ? CSVFormat.MYSQL.withHeader() : CSVFormat.MYSQL; break; case "excel": format = hasHeaderRow ? CSVFormat.EXCEL.withHeader() : CSVFormat.EXCEL; break; case "rfc4180": default: format = hasHeaderRow ? CSVFormat.RFC4180.withHeader() : CSVFormat.RFC4180; break; } } final CSVParser parser = new CSVParser(reader, format); List<String[]> rows = new ArrayList<>(); int numHeaderNames = parser.getHeaderMap().size(); try { for (final CSVRecord record : parser) { String[] row = new String[record.size()]; for (int j = 0; j < record.size(); j++) { row[j] = record.get(j); } rows.add(row); } } finally { parser.close(); reader.close(); } VariableType[] actualTypes = computeColTypes(rows, numHeaderNames); Set<String> colNameSet = parser.getHeaderMap().keySet(); String[] colNames = colNameSet.toArray(new String[colNameSet.size()]); if (colNamesOverride != null) { colNames = colNamesOverride; } if (colTypesOverride != null) { actualTypes = colTypesOverride; } return fromStrings(rows, actualTypes, colNames, false); } catch (Exception e) { throw new IllegalArgumentException("Can't open and/or read " + fileName, e); } }