List of usage examples for org.apache.commons.csv CSVFormat newFormat
public static CSVFormat newFormat(final char delimiter)
From source file:umich.ms.batmass.filesupport.files.types.mzrt.model.MzrtFile.java
public void load() throws DataLoadingException { int[] counts = new int[3]; // [0] - \r\n, [1] - \n, [2] - \r final String[] separators = { "\r\n", "\n", "\r" }; // detecting line separator try (InputStreamReader isr = new InputStreamReader( new BufferedInputStream(new FileInputStream(file.toFile())), Charsets.UTF_8)) { int c;// w ww . jav a 2 s .c o m int encountered = 0; boolean isPrevR = false; int cutoff = 50; readLoop: while ((c = isr.read()) != -1) { char ch = (char) c; switch (ch) { case '\r': if (++counts[2] > cutoff) { break readLoop; } isPrevR = true; break; case '\n': if (isPrevR) { counts[2]--; if (++counts[0] > cutoff) { break readLoop; } } else { if (++counts[1] > cutoff) { break readLoop; } } isPrevR = false; break; default: isPrevR = false; } } } catch (IOException ex) { throw new DataLoadingException("Could not detect line separator", ex); } List<Integer> idxMax = new ArrayList<>(); for (int i = 0; i < counts.length; i++) { if (idxMax.isEmpty()) { idxMax.add(i); } else if (counts[i] > counts[idxMax.get(0)]) { idxMax.clear(); idxMax.add(i); } else if (counts[i] == counts[idxMax.get(0)]) { idxMax.add(i); } } String recordSeparator; if (idxMax.size() > 1) { if (idxMax.contains(0)) { recordSeparator = separators[0]; } else if (idxMax.contains(1)) { recordSeparator = separators[1]; } else { recordSeparator = separators[idxMax.get(0)]; } } else { recordSeparator = separators[idxMax.get(0)]; } // detecting delimiter char delimiter; try (BufferedReader br = new BufferedReader(new FileReader(file.toFile()))) { List<String> lines = new ArrayList<>(); String line; int numTestLines = 10; while ((line = br.readLine()) != null) { if (!line.isEmpty()) { lines.add(line); if (lines.size() >= numTestLines) break; } } delimiter = guessDelimiter(lines); } catch (IOException ex) { throw new DataLoadingException("Could not detect delimiter character", ex); } try (BufferedReader br = new BufferedReader(new FileReader(file.toFile()))) { CSVFormat fmt = CSVFormat.newFormat(delimiter); fmt = fmt.withHeader().withIgnoreEmptyLines(true).withTrim(true).withIgnoreHeaderCase(true) .withQuoteMode(QuoteMode.NON_NUMERIC).withRecordSeparator(recordSeparator).withQuote('"'); CSVParser parser = fmt.parse(br); records = parser.getRecords(); header = parser.getHeaderMap(); String[] colNames = { HEAD_MZLO, HEAD_MZHI, HEAD_RTLO, HEAD_RTHI }; for (int i = 0; i < colNames.length; i++) { Integer index = header.get(colNames[i]); if (index == null) throw new DataLoadingException(String.format("Missing header column [%s]", colNames[i])); indexesMzRtColorOpacity[i] = index; } Integer indexColor = header.get(HEAD_COLOR); if (indexColor != null && indexColor >= 0) indexesMzRtColorOpacity[4] = indexColor; Integer indexOpacity = header.get(HEAD_OPACITY); if (indexOpacity != null && indexOpacity >= 0) indexesMzRtColorOpacity[5] = indexOpacity; } catch (IOException ex) { throw new DataLoadingException(ex); } }
From source file:umich.ms.batmass.filesupport.files.types.xcms.peaks.model.XCMSPeaks.java
/** * Parse XCMS peaks from the file, which you can create from R after running * XCMS feature finding. <br/>// www . j a v a 2s. com * Example:<br/> * {@code xs <- xcmsSet(files = file_mzml, method = "massifquant", prefilter = c(1, 10000), peakwidth = c(5, 500), ppm = 20, criticalValue = 1.0, consecMissedLimit = 0, withWave = 0, nSlaves=4)} <br/> * {@code peaks <- ixs@peaks[1:7108,1:9]} <br/> * {@code write.table(peaks, sep = "\t",file = "D:/projects/XCMS/peaks.xcms.csv")} * @param path * @return */ public static XCMSPeaks create(Path path) throws IOException { if (!Files.exists(path)) throw new IllegalArgumentException("File path for XCMS peaks does not exist."); XCMSPeaks peaks = new XCMSPeaks(); BufferedReader reader = new BufferedReader(new FileReader(path.toFile())); String[] header = {}; CSVFormat format = CSVFormat.newFormat(',').withHeader().withIgnoreSurroundingSpaces() .withAllowMissingColumnNames().withQuoteMode(QuoteMode.NON_NUMERIC).withQuote('"'); CSVParser parser = new CSVParser(reader, format); String val; for (final CSVRecord r : parser) { XCMSPeak p = new XCMSPeak(); val = r.get(0); p.setRowNum(Integer.parseInt(val)); val = r.get("mz"); p.setMz(Double.parseDouble(val)); val = r.get("mzmin"); p.setMzMin(Double.parseDouble(val)); val = r.get("mzmax"); p.setMzMax(Double.parseDouble(val)); val = r.get("rt"); p.setRt(Double.parseDouble(val)); val = r.get("rtmin"); p.setRtMin(Double.parseDouble(val)); val = r.get("rtmax"); p.setRtMax(Double.parseDouble(val)); val = r.get("into"); p.setInto(Double.parseDouble(val)); val = r.get("maxo"); p.setMaxo(Double.parseDouble(val)); val = r.get("sample"); p.setSample(val); // these are optional and are only added by further R package // called 'CAMERA' processing try { val = getRecordValueForColName(r, "isotopes"); p.setIsotopes(val); } catch (IllegalArgumentException e) { } try { val = r.get("adduct"); p.setAdduct(val); } catch (IllegalArgumentException e) { p.setAdduct(""); } try { val = r.get("pcgroup"); p.setPcgroup(Integer.parseInt(val)); } catch (IllegalArgumentException e) { } peaks.add(p); } return peaks; }
From source file:utils.VocabularyFromFiles.java
public void parseWordInputCSV(String wordFileLocation, String bandColumnName, String wordColumnName, String ratingLabels, String correctResponse) throws IOException { File inputFileWords = new File(wordFileLocation); final Reader reader = new InputStreamReader(inputFileWords.toURL().openStream(), "UTF-8"); // todo: this might need to change to "ISO-8859-1" depending on the usage Iterable<CSVRecord> records = CSVFormat.newFormat(';').withHeader().parse(reader); for (CSVRecord record : records) { int bandNumber = Integer.parseInt(record.get(bandColumnName)); String spelling = record.get(wordColumnName); String stimulusString = bandNumber + ";" + spelling; localWORDS.add(stimulusString);//from ww w.j a v a 2s. com } }
From source file:utils.VocabularyFromFiles.java
public void parseNonwordInputCSV(String nonwordFileLocation, String nonwordColumnName, String ratingLabels, String correctResponse) throws IOException { final File inputFileNonWords = new File(nonwordFileLocation); final Reader reader = new InputStreamReader(inputFileNonWords.toURL().openStream(), "UTF-8"); // todo: this might need to change to "ISO-8859-1" depending on the usage Iterable<CSVRecord> records = CSVFormat.newFormat(';').withHeader().parse(reader); for (CSVRecord record : records) { String spelling = record.get(nonwordColumnName); localNONWORDS.add(spelling);//from w ww . j a v a 2s .c o m } }