List of usage examples for org.apache.commons.csv CSVParser CSVParser
public CSVParser(final Reader reader, final CSVFormat format) throws IOException
If you do not read all records from the given reader , you should call #close() on the parser, unless you close the reader .
From source file:edu.washington.gs.skyline.model.quantification.QuantificationTest.java
private Map<RecordKey, Double> readExpectedRows(String filename) throws Exception { Map<RecordKey, Double> map = new HashMap<>(); Reader reader = new InputStreamReader(QuantificationTest.class.getResourceAsStream(filename)); try {/*from ww w. j a v a 2 s. c o m*/ CSVParser parser = new CSVParser(reader, CSVFormat.EXCEL.withHeader()); for (CSVRecord record : parser.getRecords()) { map.put(new RecordKey(record), parseNullableDouble(record.get("NormalizedArea"))); } } finally { reader.close(); } return map; }
From source file:de.tudarmstadt.ukp.experiments.argumentation.sequence.annotator.OnlyFilesMatchingPredictionsReader.java
public static List<Sequence> extractSequences(File tokenLevelPredictionsCsvFile1) throws IOException { List<Sequence> result = new ArrayList<>(); // load the CSV CSVParser csvParser = new CSVParser(new FileReader(tokenLevelPredictionsCsvFile1), CSVFormat.DEFAULT.withCommentMarker('#')); String prevSeqId = null;/*from w w w . j a v a 2 s . co m*/ Sequence currentSequence = new Sequence(); int tokenCounter = 0; for (CSVRecord csvRecord : csvParser) { // row for particular instance (token) String predictedTag = csvRecord.get(1); String token = csvRecord.get(2); String seqId = csvRecord.get(3); TokenEntry tokenEntry = new TokenEntry(token, predictedTag); // if the token belongs to the previous seqId, add it to the sequence if (prevSeqId == null || seqId.equals(prevSeqId)) { currentSequence.getTokens().add(tokenEntry); } else { // otherwise start a new sequence result.add(currentSequence); currentSequence = new Sequence(); currentSequence.getTokens().add(tokenEntry); } prevSeqId = seqId; tokenCounter++; } // don't forget to add the last sequence result.add(currentSequence); System.out.println("Loaded " + result.size() + " sequences with total " + tokenCounter + " tokens."); return result; }
From source file:com.sojw.TableNamesFinderExecutor.java
public static List<CSVRecord> getCSVFileContents(final String filePath) throws IOException { final Reader in = new BufferedReader(new FileReader(filePath)); final CSVParser parser = new CSVParser(in, CSVFormat.DEFAULT); List<CSVRecord> fileContentList = parser.getRecords(); return fileContentList; }
From source file:core.reporting.ImportFromFile.java
/** * validate the content of csv file against the column definition file. this methos store the valid record into a * buffer and record in system log file any error found in input file * /*from ww w. j a v a 2s.c om*/ * @param infile - record list parsed from imput file * * @return number of error found. */ private int validateRecord(Iterable<CSVRecord> ircdlist) { // clear all previous log for import flag SystemLog.clearLogByFlag("ie"); // column definition Iterable<CSVRecord> coldefl = null; try { Reader in = new FileReader(TResourceUtils.getFile(columnModelDef + ".csv")); coldefl = (new CSVParser(in, CSVFormat.EXCEL.withHeader()).getRecords()); } catch (Exception e) { SystemLog.logException(e); } SimpleDateFormat dfomat = null; tempBuffer.clear(); int line = 0; int error = 0; int coldeflen = 0; fileColumns = ""; for (CSVRecord ircd : ircdlist) { Record frcd = new Record(recordModel); line++; coldeflen = 0; // premature return if error > coldefl if (error > coldeflen) { SystemLog.log("inputfile.msg05", "ie", "", error); return error; } for (CSVRecord cdr : coldefl) { coldeflen++; Object iobj = null; String siobj = null; String fieldn = null; // contain field name try { fieldn = cdr.get("field"); ircd.get(fieldn); } catch (Exception e) { // if field is mandatory, log error if (cdr.get("mandatory").equals("true")) { SystemLog.log("inputfile.msg02", "ie", "", line, cdr.get("field")); error++; } continue; } // value class. try { String cls = cdr.get("class"); // String class by default iobj = ircd.get(fieldn); if (cls.equals("Integer")) { iobj = Integer.parseInt(ircd.get(fieldn)); } if (cls.equals("Double")) { iobj = Double.parseDouble(ircd.get(fieldn)); } if (cls.equals("Date")) { // date may be not present dfomat = new SimpleDateFormat(cdr.get("format")); Date d = iobj.equals("") ? TStringUtils.ZERODATE : dfomat.parse((String) iobj); iobj = new java.sql.Date(d.getTime()); } } catch (Exception e) { SystemLog.log("inputfile.msg03", "ie", "", line, cdr.get("field"), cdr.get("class"), cdr.get("format")); error++; continue; } // valid value siobj = ircd.get(fieldn); boolean vvb = true; String vv = cdr.get("valid values"); if (!vv.equals("")) { vvb = false; String[] vvlst = vv.split(";"); for (String vvi : vvlst) { vvb = (siobj.equals(vvi)) ? true : vvb; } } if (vvb == false) { SystemLog.log("inputfile.msg04", "ie", "", line, cdr.get("field"), cdr.get("valid values")); error++; continue; } // no problem? add field String tf = cdr.get("target_field"); fileColumns += tf + ";"; frcd.setFieldValue(tf, iobj); } tempBuffer.add(frcd); } fileColumns = fileColumns.substring(0, fileColumns.length() - 1); return error; }
From source file:com.publictransitanalytics.scoregenerator.datalayer.directories.GTFSReadingStopTimesDirectory.java
private void parseStopTimesFile(final ImmutableSetMultimap<String, FrequencyRecord> frequencyRecordMap, final Reader stopTimesReader) throws FileNotFoundException, IOException, InterruptedException { final CSVParser parser = new CSVParser(stopTimesReader, CSVFormat.DEFAULT.withHeader()); final SortedSetMultimap<String, RawTripStop> rawTripMap = TreeMultimap.create(Comparator.naturalOrder(), (stop1, stop2) -> Integer.compare(stop1.getSequence(), stop2.getSequence())); final Iterator<CSVRecord> stopTimesIter = parser.iterator(); while (stopTimesIter.hasNext()) { final CSVRecord record = stopTimesIter.next(); final String rawTripId = record.get("trip_id"); final int stopSequence = Integer.valueOf(record.get("stop_sequence")); final String stopId = record.get("stop_id"); final String arrivalTimeString = record.get("arrival_time"); final TransitTime arrivalTime = (arrivalTimeString == null) ? null : TransitTime.parse(arrivalTimeString); final String departureTimeString = record.get("departure_time"); final TransitTime departureTime = (departureTimeString == null) ? null : TransitTime.parse(arrivalTimeString); if (frequencyRecordMap.containsKey(rawTripId)) { final RawTripStop rawTripStop = new RawTripStop(arrivalTime, departureTime, stopId, rawTripId, stopSequence);// ww w. j ava 2s . c o m rawTripMap.put(rawTripId, rawTripStop); } else { final TripId tripId = new TripId(rawTripId); final TripStop tripStop = new TripStop(arrivalTime, stopId, tripId, stopSequence); try { final TripIdKey tripIdKey = new TripIdKey(rawTripId); tripsStore.put(tripIdKey, tripId); tripSequenceStore.put(new TripSequenceKey(tripIdKey, arrivalTime, stopSequence), tripStop); stopTimesStore.put(StopTimeKey.getWriteKey(stopId, arrivalTime), tripStop); } catch (final BitvantageStoreException e) { throw new ScoreGeneratorFatalException(e); } } } for (final String rawTripId : rawTripMap.keySet()) { final ImmutableSet<FrequencyRecord> frequencyRecords = frequencyRecordMap.get(rawTripId); for (final FrequencyRecord frequencyRecord : frequencyRecords) { TransitTime recurringTime = frequencyRecord.getStartTime(); while (recurringTime.isBefore(frequencyRecord.getEndTime())) { final TransitTime baseArrivalTime = rawTripMap.get(rawTripId).first().getArrivalTime(); final TripId tripId = new TripId(rawTripId, recurringTime.toString()); for (final RawTripStop rawTripStop : rawTripMap.get(rawTripId)) { final TransitTime arrivalTime = recurringTime .plus(TransitTime.durationBetween(baseArrivalTime, rawTripStop.getArrivalTime())); final int stopSequence = rawTripStop.getSequence(); final String stopId = rawTripStop.getStopId(); final TripStop tripStop = new TripStop(arrivalTime, stopId, tripId, stopSequence); final TripIdKey tripIdKey = new TripIdKey(tripId.getRawTripId(), tripId.getQualifier()); try { tripsStore.put(tripIdKey, tripId); tripSequenceStore.put(new TripSequenceKey(tripIdKey, arrivalTime, stopSequence), tripStop); stopTimesStore.put(StopTimeKey.getWriteKey(stopId, arrivalTime), tripStop); } catch (final BitvantageStoreException e) { throw new ScoreGeneratorFatalException(e); } } recurringTime = recurringTime.plus(frequencyRecord.getInterval()); } } } }
From source file:com.ibm.watson.app.qaclassifier.tools.GenerateTrainingAndPopulationData.java
/** * Reads in the question input file and creates a POJO for each question it finds. If the label associated with * the question does not exist in the previously read in answer store then it is skipped * /*www . ja v a2 s . c om*/ * @return TrainingData - full POJO of the training data */ private static NLClassifierTrainingData readQuestionInput(List<ManagedAnswer> store) { NLClassifierTrainingData data = null; try (FileReader reader = new FileReader(questionInput); CSVParser parser = new CSVParser(reader, CSVFormat.EXCEL)) { // read in the csv file and get the records List<CSVRecord> records = parser.getRecords(); // now we can create the training data because we have read the records data = new NLClassifierTrainingData(); data.setLanguage("en"); for (CSVRecord r : records) { // order is: QuestionText, LabelId // check for existence of label first, if not there, skip // we only add the training instance if there is an associated answer String text = r.get(0); String label = r.get(1); if (labelHasAnswer(label, store)) { data.addTrainingData(text, label); } else { System.out.println(MessageKey.AQWQAC24009E_label_not_found_in_answer_store_including_2 .getMessage(text, label).getFormattedMessage()); } } } catch (Exception e) { e.printStackTrace(); } return data; }
From source file:edu.washington.gs.skyline.model.quantification.QuantificationTest.java
private List<ReplicateData> readReplicates(String filename) throws Exception { Map<String, ReplicateData> replicates = new LinkedHashMap<>(); Reader reader = new InputStreamReader(QuantificationTest.class.getResourceAsStream(filename)); try {//from w ww . j av a 2s. c om CSVParser parser = new CSVParser(reader, CSVFormat.EXCEL.withHeader()); for (CSVRecord record : parser.getRecords()) { String fileName = record.get("FileName"); ReplicateData replicate = replicates.get(fileName); if (replicate == null) { replicate = new ReplicateData(); replicates.put(fileName, replicate); } } } finally { reader.close(); } throw new NotImplementedException(); }
From source file:com.itemanalysis.jmetrik.file.JmetrikFileReader.java
/** * Opens a connection by instantiating the reader and CSVParser. It extracts the * header into the VariableAttributeMap and moves the cursor to the first row of data. * //from w w w . j a va2 s. c om * @throws IOException */ public void openConnection() throws IOException { //opens file and advances cursor to beginning of data dataReader = new InputStreamReader(new BOMInputStream(Files.newInputStream(f)), "UTF-8"); dataParser = new CSVParser(dataReader, CSVFormat.DEFAULT.withCommentMarker('#')); dataIterator = dataParser.iterator(); setVariableAttributes(dataIterator); //Get Column names String[] colNames = new String[variableAttributes.size()]; int index = 0; for (VariableName v : variableAttributes.keySet()) { colNames[index] = v.toString(); index++; } dataReader.close(); dataParser.close(); //Advance iterator to first data record (A little inefficient because loops over header a second time) //This inefficiency is because CSVReader only allows the header to be set in the constructor. dataReader = new InputStreamReader(new BOMInputStream(Files.newInputStream(f)), "UTF-8"); dataParser = new CSVParser(dataReader, CSVFormat.DEFAULT.withCommentMarker('#').withHeader(colNames)); dataIterator = dataParser.iterator(); advanceToFirstDataRecord(); }
From source file:com.itemanalysis.jmetrik.file.JmetrikFileImporter.java
/** * Create a header map to the CSV file, but imposes naming conventions on the column names. * *//*w w w.j ava2s . c o m*/ private void setVariableAttributes() { VariableAttributes variableAttributes = null; int position = 0; Reader reader = null; CSVParser parser = null; VariableName tempName = null; try { reader = new InputStreamReader(new BOMInputStream(new FileInputStream(dataFile)), "UTF-8"); parser = new CSVParser(reader, dataFileFormat.withHeader()); if (hasHeader) { Map<String, Integer> csvMap = parser.getHeaderMap(); for (String s : csvMap.keySet()) { variableAttributes = new VariableAttributes(new VariableName(s), new VariableLabel(""), DataType.INTEGER, position); variableAttributeMap.put(variableAttributes.getName(), variableAttributes); position++; } } else { Iterator<CSVRecord> iter = parser.iterator(); CSVRecord csvRecord = iter.next(); for (int i = 0; i < csvRecord.size(); i++) { variableAttributes = new VariableAttributes(new VariableName("v" + (i + 1)), new VariableLabel(""), DataType.INTEGER, position); variableAttributeMap.put(variableAttributes.getName(), variableAttributes); position++; } } } catch (IOException ex) { theException = ex; } finally { try { if (parser != null) parser.close(); if (reader != null) reader.close(); } catch (IOException ex) { theException = ex; } } }
From source file:com.marklogic.contentpump.SplitDelimitedTextReader.java
@Override protected void initParser(InputSplit inSplit) throws IOException, InterruptedException { setFile(((DelimitedSplit) inSplit).getPath()); configFileNameAsCollection(conf, file); // get header from the DelimitedSplit TextArrayWritable taw = ((DelimitedSplit) inSplit).getHeader(); fields = taw.toStrings();// w ww . java 2s . co m try { docBuilder.configFields(conf, fields); } catch (IllegalArgumentException e) { LOG.error("Skipped file: " + file.toUri() + ", reason: " + e.getMessage()); return; } fileIn = fs.open(file); lineSeparator = retrieveLineSeparator(fileIn); if (start != 0) { // in case the cut point is \n, back off 1 char to create a partial // line so that 1st line can be skipped start--; } fileIn.seek(start); instream = new InputStreamReader(fileIn, encoding); bytesRead = 0; fileLen = inSplit.getLength(); if (uriName == null) { generateId = conf.getBoolean(CONF_INPUT_GENERATE_URI, false); if (generateId) { idGen = new IdGenerator(file.toUri().getPath() + "-" + ((FileSplit) inSplit).getStart()); } else { uriId = 0; } } boolean found = generateId || uriId == 0; for (int i = 0; i < fields.length && !found; i++) { if (fields[i].equals(uriName)) { uriId = i; found = true; break; } } if (found == false) { // idname doesn't match any columns LOG.error("Skipped file: " + file.toUri() + ", reason: " + URI_ID + " " + uriName + " is not found"); return; } // keep leading and trailing whitespaces to ensure accuracy of pos // do not skip empty line just in case the split boundary is \n parser = new CSVParser(instream, CSVParserFormatter.getFormat(delimiter, encapsulator, false, false)); parserIterator = parser.iterator(); // skip first line: // 1st split, skip header; other splits, skip partial line if (parserIterator.hasNext()) { String[] values = getLine(); start += getBytesCountFromLine(values); pos = start; } }