List of usage examples for org.apache.commons.csv CSVParser getRecords
public List<CSVRecord> getRecords() throws IOException
From source file:javalibs.CSVDataNormalizer.java
private void readCSV() { try {/*from w w w . j a v a 2s . c o m*/ CSVParser parser = new CSVParser(Files.newBufferedReader(Paths.get(this.csvPath)), CSVFormat.DEFAULT.withHeader().withIgnoreHeaderCase().withTrim()); // Get all headers in the CSV file so they can be used later when writing the file this.headerMap = parser.getHeaderMap(); // Add them to the records list for later use this.allRecords = parser.getRecords(); parser.close(); reverseHeaderMap(); } catch (IOException e) { log_.die(e); } }
From source file:net.sourceforge.ganttproject.io.GanttCSVOpen.java
/** * Create tasks from file.//www .jav a2s . co m * * @throws IOException * on parse error or input read-failure */ public boolean load() throws IOException { CSVParser parser = new CSVParser(myInputSupplier.get(), CSVFormat.DEFAULT.withEmptyLinesIgnored(false).withSurroundingSpacesIgnored(true)); int numGroup = 0; RecordGroup currentGroup = null; boolean searchHeader = true; List<CSVRecord> records = parser.getRecords(); for (CSVRecord record : records) { if (record.size() == 0) { // If line is empty then current record group is probably finished. // Let's search for the next group header. searchHeader = true; continue; } if (searchHeader) { // Record is not empty and we're searching for header. if (numGroup < myRecordGroups.size() && myRecordGroups.get(numGroup).isHeader(record)) { // If next group acknowledges the header, then we give it the turn, // otherwise it was just an empty line in the current group searchHeader = false; currentGroup = myRecordGroups.get(numGroup); parser.readHeader(record); currentGroup.setHeader(Lists.newArrayList(record.iterator())); numGroup++; continue; } searchHeader = false; } assert currentGroup != null; currentGroup.process(record); } for (RecordGroup group : myRecordGroups) { group.postProcess(); } // Succeeded return true; }
From source file:com.siemens.sw360.portal.portlets.admin.UserPortlet.java
private List<UserCSV> getUsersFromRequest(PortletRequest request, String fileUploadFormId) throws IOException, TException { final UploadPortletRequest uploadPortletRequest = PortalUtil.getUploadPortletRequest(request); final InputStream stream = uploadPortletRequest.getFileAsStream(fileUploadFormId); Reader reader = new InputStreamReader(stream); CSVFormat format = CommonUtils.sw360CsvFormat; CSVParser parser = new CSVParser(reader, format); List<CSVRecord> records; records = parser.getRecords(); if (records.size() > 0) { records.remove(0); // Remove header }//from w ww . jav a 2 s .co m return getUsersFromCSV(records); }
From source file:com.ibm.watson.developer_cloud.professor_languo.model.stack_exchange.CorpusBuilderTest.java
private void deserialiezd_duplicate_threads_should_match_original_duplicate_threads() throws IngestionException { String csvFilePath = dupCorpusBuilder.getDupThreadDirPath() + StackExchangeConstants.DUP_THREAD_TSV_FILE_NAME + StackExchangeConstants.DUP_THREAD_TSV_FILE_EXTENSION; File csvData = new File(csvFilePath); CSVParser parser; List<CSVRecord> records; try {/*from w w w.j a v a2 s .com*/ parser = CSVParser.parse(csvData, Charset.defaultCharset(), CSVFormat.TDF.withHeader()); records = parser.getRecords(); } catch (IOException e) { throw new IngestionException(e); } Set<StackExchangeThread> dupThreadSet = dupCorpusBuilder.getDupThreadSetFromBinFiles(); for (StackExchangeThread thread : dupThreadSet) { String binfileName = dupCorpusBuilder.getDupThreadDirPath() + thread.getId() + StackExchangeConstants.BIN_FILE_SUFFIX; CSVRecord matchRecord = null; for (CSVRecord record : records) if (Integer.parseInt(record.get(0)) == thread.getId()) { matchRecord = record; break; } assertTrue(matchRecord != null); // TODO haven't check the originId yet since it requires the new // method to get origin id from String deserTitle = matchRecord.get(1), deserBody = matchRecord.get(2), deserFileName = matchRecord.get(4), deserTags = matchRecord.get(5); assertEquals(deserTitle, thread.getQuestion().getTitle()); assertEquals(deserBody, thread.getQuestion().getBody()); assertEquals(deserFileName, binfileName); assertEquals(deserTags, thread.getConcatenatedTagsText()); } }
From source file:com.teamnx.util.CSVToDateBase.java
/** * @param fileName/* w w w. j av a 2s . co m*/ * @param type * @return */ public List readCsvFile(String fileName, int type) { FileReader fileReader = null; CSVParser csvFileParser = null; List list = null; //CSVFormatheader mapping CSVFormat csvFileFormat = CSVFormat.DEFAULT.withHeader(file_header); try { //?FileReader object fileReader = new FileReader(fileName); //? CSVParser object csvFileParser = new CSVParser(fileReader, csvFileFormat); //CSVrecords List<CSVRecord> csvRecords = csvFileParser.getRecords(); // CSV switch (type) { case USER: List<User> userList = new ArrayList<User>(); for (int i = 1; i < csvRecords.size(); i++) { CSVRecord record = csvRecords.get(i); //? User user = new User(); user.setId(record.get("id")); user.setName(record.get("name")); user.setPassword(record.get("password")); user.setDepartment_id(Integer.parseInt(record.get("department_id"))); user.setCharacter(Integer.parseInt(record.get("character"))); user.setClass_id(record.get("class_id")); user.setDepartment_name(record.get("department_name")); userList.add(user); } list = userList; break; case DEPARTMENT: List<Department> departmentList = new ArrayList<Department>(); for (int i = 1; i < csvRecords.size(); i++) { CSVRecord record = csvRecords.get(i); //? Department department = new Department(); department.setId(Integer.parseInt(record.get("id"))); department.setName(record.get("name")); departmentList.add(department); } list = departmentList; break; case COURSE: List<Course> courseList = new ArrayList<Course>(); for (int i = 1; i < csvRecords.size(); i++) { CSVRecord record = csvRecords.get(i); //? Course course = new Course(); course.setId(record.get("id")); course.setName(record.get("name")); course.setDepartment_id(Integer.parseInt(record.get("department_id"))); course.setStart_time(Integer.parseInt(record.get("start_time"))); course.setEnd_time(Integer.parseInt(record.get("end_time"))); course.setPosition(record.get("position")); course.setSchedule(record.get("schedule")); course.setYear(Integer.parseInt(record.get("year"))); course.setSemester(Integer.parseInt(record.get("semester"))); int j = Integer.parseInt(record.get("category")); course.setCategory(j == 1 ? true : false); course.setMax_member(Integer.parseInt(record.get("max_member"))); courseList.add(course); } list = courseList; break; case STUDENT_COURSE: List<StudentCourse> studentCourseList = new ArrayList<StudentCourse>(); for (int i = 1; i < csvRecords.size(); i++) { CSVRecord record = csvRecords.get(i); StudentCourse studentCourse = new StudentCourse(); studentCourse.setId(record.get("id")); studentCourse.setCourseId(record.get("course_id")); studentCourse.setStudentId(record.get("student_id")); studentCourseList.add(studentCourse); } list = studentCourseList; break; case TEACHER_COURSE: List<TeacherCourse> teacherCourseList = new ArrayList<TeacherCourse>(); for (int i = 1; i < csvRecords.size(); i++) { CSVRecord record = csvRecords.get(i); TeacherCourse teacherCourse = new TeacherCourse(); teacherCourse.setId(record.get("id")); teacherCourse.setTeacherId(record.get("teacher_id")); teacherCourse.setCourseId(record.get("course_id")); teacherCourseList.add(teacherCourse); } list = teacherCourseList; break; } } catch (Exception e) { e.printStackTrace(); } finally { try { fileReader.close(); csvFileParser.close(); } catch (IOException e) { e.printStackTrace(); } finally { return list; } } }
From source file:GUI.ReadFile.java
public boolean readTrace(String fileName) { FileReader fileReader;/* w w w . j a va 2s . com*/ CSVParser csvFileParser; boolean isSuccess = true; CSVFormat csvFileFormat = CSVFormat.DEFAULT.withHeader(TRACE_HEADER_MAPPING); try { ArrayList<String> Activity_set = new ArrayList<String>(); HashSet<String> ID_set = new HashSet<String>(); traces = new ArrayList<Trace>(); //initialize FileReader object System.out.println(fileName); fileReader = new FileReader(fileName); //initialize CSVParser object csvFileParser = new CSVParser(fileReader, csvFileFormat); //Get a list of CSV file records List<CSVRecord> csvRecords = csvFileParser.getRecords(); Trace t = new Trace(""); //Read the CSV file records starting from the second record to skip the header for (int i = 1; i < csvRecords.size(); i++) { CSVRecord record = csvRecords.get(i); String ID = record.get(CaseID); if (!ID_set.contains(ID) || (i == csvRecords.size() - 1)) { //Discard void trace if (i != 1) { traces.add(t); } ID_set.add(ID); t = new Trace(ID); } Activity ac = new Activity(record.get(Activity), record.get(StartTime), record.get(CompleteTime), record.get(Timestamp)); t.add_activity(ac); if (!Activity_set.contains(ac.get_name())) { Activity_set.add(ac.get_name()); } } //sort activity set by string Collections.sort(Activity_set); //sort trace by ID Collections.sort(traces, new Comparator<Trace>() { @Override public int compare(Trace t1, Trace t2) { return Integer.parseInt(t1.get_ID()) < Integer.parseInt(t2.get_ID()) ? -1 : 1; } }); //Set activity set for each trace for (Trace T : traces) { T.set_ActivitySet((List<String>) Activity_set.clone()); } } catch (Exception e) { System.out.println("Error in CsvFileReader !!!"); e.printStackTrace(); isSuccess = false; return isSuccess; } if (isSuccess) { try { fileReader.close(); csvFileParser.close(); } catch (IOException e) { System.out.println("Error while closing fileReader/csvFileParser !!!"); } } return isSuccess; }
From source file:com.datafibers.kafka.connect.FileGenericSourceTask.java
/** * Decode Csv to struct according to schema form Confluent schema registry * @param line//from w w w . j a va2 s .c o m * @return struct of decoded */ public Struct structDecodingFromCsv(String line) { if (line.length() > 0) { Struct struct = new Struct(dataSchema); JsonNode json = null; try { // TODO support other type of files fro here CSVParser csvParser = CSVFormat.EXCEL.withIgnoreEmptyLines().withIgnoreHeaderCase() .withRecordSeparator('\n').withQuote('"').withEscape('\\').withDelimiter(',').withTrim() .parse(new StringReader(line)); // Since this is single line parser, we get element 0 only CSVRecord entry = csvParser.getRecords().get(0); List<org.apache.kafka.connect.data.Field> fields = dataSchema.fields(); int schema_fields_size = fields.size(); log.info("schema_fields_size = " + schema_fields_size); for (int index = 0; index <= schema_fields_size - 1; index++) { Object value = null; org.apache.kafka.connect.data.Field theField = fields.get(index); log.info("printed indexed " + index + " fields: " + theField.name() + ":" + theField.schema().type()); if (theField != null) { switch (theField.schema().type()) { case STRING: { value = entry.get(index); break; } case INT32: { value = Integer.parseInt(entry.get(index)); break; } case BOOLEAN: { value = Boolean.parseBoolean(entry.get(index)); break; } default: value = entry.get(index); } } struct.put(theField.name(), value); } } catch (IOException ex) { throw new ConnectException(String.format("Unable to parse %s into a valid CSV", filename), ex); } return struct; } return null; }
From source file:com.archimatetool.csv.importer.CSVImporter.java
/** * Get all records for a CSV file.//w w w .j a v a2s .c om * This is a brute-force approach to try with a comma delimiter first. If that fails then * try a semicolon, and if that fails, a tab. * * @param file The file to open * @return Records, which may be empty but never null * @throws IOException */ List<CSVRecord> getRecords(File file) throws IOException { List<CSVRecord> records = new ArrayList<CSVRecord>(); CSVParser parser = null; String errorMessage = "invalid char between encapsulated token and delimiter"; //$NON-NLS-1$ try { parser = new CSVParser(new FileReader(file), CSVFormat.DEFAULT); records = parser.getRecords(); } catch (IOException ex) { if (parser != null) { parser.close(); } if (ex.getMessage() != null && ex.getMessage().contains(errorMessage)) { try { parser = new CSVParser(new FileReader(file), CSVFormat.DEFAULT.withDelimiter(';')); records = parser.getRecords(); } catch (IOException ex2) { if (parser != null) { parser.close(); } if (ex2.getMessage() != null && ex2.getMessage().contains(errorMessage)) { parser = new CSVParser(new FileReader(file), CSVFormat.DEFAULT.withDelimiter('\t')); records = parser.getRecords(); } else { throw ex2; } } } else { throw ex; } } finally { if (parser != null) { parser.close(); } } return records; }
From source file:com.ggvaidya.scinames.model.Dataset.java
/** * Load this dataset from a CSV file. We load the entire CSV file, except * for blank cells./*from w ww .ja v a 2 s . c om*/ * * @param project The project to which the resulting Dataset should belong * @param csvFormat The CSV format of the input file. * @param csvFile The input file to load. * @param renamedColumns Rename these columns on the fly. * @return * @throws IOException */ public static Dataset fromCSV(CSVFormat csvFormat, File csvFile) throws IOException { Dataset dataset = new Dataset(csvFile.getName(), new SimplifiedDate(), Dataset.TYPE_CHECKLIST); // Get ready to filter input files. InputStream ins = new FileInputStream(csvFile); // Look for BOMs and discard! ins = new BOMInputStream(ins, false); // Convert into a Reader. Reader reader = new BufferedReader(new InputStreamReader(ins)); // Load CSV CSVParser parser = csvFormat.withHeader().parse(reader); Map<String, Integer> headerMap = parser.getHeaderMap(); dataset.setColumns(headerMap.entrySet().stream().sorted((Object o1, Object o2) -> { Map.Entry<String, Integer> e1 = (Map.Entry) o1; Map.Entry<String, Integer> e2 = (Map.Entry) o2; return e1.getValue().compareTo(e2.getValue()); }).map(e -> e.getKey()).map(colName -> DatasetColumn.of(colName)) /* .map(col -> { // Rename any renamedColumns. if(renamedColumns.containsKey(col)) return renamedColumns.get(col); else return col; })*/ .collect(Collectors.toList())); dataset.rows.clear(); dataset.rows.addAll(parser.getRecords().stream().map(record -> { DatasetRow row = new DatasetRow(dataset); row.putAll(record.toMap()); return row; }).collect(Collectors.toList())); return dataset; }
From source file:com.amazonaws.services.dynamodbv2.online.index.integration.tests.ViolationCorrectionTest.java
/** * Iterates through detection output file: first leave updates blank based on missing updates per key. * Once it has reached the missing update number, it removes the expected gsi values as per the specified 'missingGsiExpectedHashValues'. * Note that once blank number is reached, it also starts adding updates. * It then iterates over the rows again and adds values for Yes/No/Invalid in the delete column. * It returns all error records, if present. If not, it returns all records. *///from w w w . j a v a 2 s. c om private static List<List<String>> createCorrectionFile(final String detectionFile, final String correctionFile, final String gsiHashKeyName, final String gsiHashKeyType, final String gsiRangeKeyName, final String gsiRangeKeyType, final Map<String, String> tableHashToNewGsiHashValueMap, final Map<String, String> tableHashToNewGsiRangeValueMap, final int missingUpdatesPerKey, final int missingGsiExpectedHashValues, final int invalidValuesForDelete, final int numOfYesForDelete, final int numOfNoForDelete) throws IOException { List<List<String>> errorRecords = null; List<List<String>> allRecords = null; BufferedReader br = null; BufferedWriter bw = null; CSVParser parser = null; CSVPrinter csvPrinter = null; try { br = new BufferedReader(new FileReader(new File(detectionFile))); bw = new BufferedWriter(new FileWriter(new File(correctionFile))); parser = new CSVParser(br, TestUtils.csvFormat); csvPrinter = new CSVPrinter(bw, TestUtils.csvFormat); List<CSVRecord> detectorRecords = parser.getRecords(); int hashMissingUpdates = 0; int rangeMissingUpdates = 0; int missingGsiExpectedHashValuesCurrent = 0; // Print Header Map<String, Integer> header = parser.getHeaderMap(); csvPrinter.printRecord(header.keySet()); allRecords = new ArrayList<List<String>>(); for (CSVRecord csvRecord : detectorRecords) { List<String> newRecord = new ArrayList<String>(); String tableHashKeyRecorded = csvRecord.get(ViolationRecord.TABLE_HASH_KEY); String hashKeyViolationType = null; if (gsiHashKeyName != null) { hashKeyViolationType = csvRecord.get(ViolationRecord.GSI_HASH_KEY_VIOLATION_TYPE); } String rangeKeyViolationType = null; if (gsiRangeKeyName != null) { rangeKeyViolationType = csvRecord.get(ViolationRecord.GSI_RANGE_KEY_VIOLATION_TYPE); } for (int i = 0; i < csvRecord.size(); i++) { newRecord.add(i, csvRecord.get(i)); } String newGsiVal = null; if (hashKeyViolationType != null && (hashKeyViolationType.equals("Size Violation") || hashKeyViolationType.equals("Type Violation"))) { if (hashMissingUpdates < missingUpdatesPerKey) { allRecords.add(newRecord); hashMissingUpdates++; continue; } //Remove expected hash Values if (missingGsiExpectedHashValuesCurrent < missingGsiExpectedHashValues) { newRecord.remove((int) header.get(ViolationRecord.GSI_HASH_KEY)); newRecord.add(header.get(ViolationRecord.GSI_HASH_KEY), ""); missingGsiExpectedHashValuesCurrent++; } newRecord.remove((int) header.get(ViolationRecord.GSI_HASH_KEY_UPDATE_VALUE)); newGsiVal = getNewValue(gsiHashKeyType, 4 /*length*/); newRecord.add(header.get(ViolationRecord.GSI_HASH_KEY_UPDATE_VALUE), newGsiVal); tableHashToNewGsiHashValueMap.put(tableHashKeyRecorded, newGsiVal); } if (rangeKeyViolationType != null && (rangeKeyViolationType.equals("Size Violation") || rangeKeyViolationType.equals("Type Violation"))) { if (rangeMissingUpdates < missingUpdatesPerKey) { allRecords.add(newRecord); rangeMissingUpdates++; continue; } newRecord.remove(header.get(ViolationRecord.GSI_RANGE_KEY_UPDATE_VALUE)); newGsiVal = getNewValue(gsiRangeKeyType, 4 /*length*/); newRecord.add(header.get(ViolationRecord.GSI_RANGE_KEY_UPDATE_VALUE), newGsiVal); tableHashToNewGsiRangeValueMap.put(tableHashKeyRecorded, newGsiVal); } allRecords.add(newRecord); } // Add 'Y' or 'N' for delete column if (numOfNoForDelete > 0 || numOfYesForDelete > 0 || invalidValuesForDelete > 0) { errorRecords = new ArrayList<List<String>>(); int numOfYesAdded = 0; int numOfNoAdded = 0; int numOfInvalids = 0; for (List<String> record : allRecords) { if (numOfInvalids < invalidValuesForDelete) { record.remove(header.get(ViolationRecord.GSI_CORRECTION_DELETE_BLANK)); record.add(header.get(ViolationRecord.GSI_CORRECTION_DELETE_BLANK), "xx"); numOfInvalids++; errorRecords.add(record); continue; } if (numOfYesAdded < numOfYesForDelete) { record.remove(header.get(ViolationRecord.GSI_CORRECTION_DELETE_BLANK)); record.add(header.get(ViolationRecord.GSI_CORRECTION_DELETE_BLANK), "Y"); numOfYesAdded++; continue; } if (numOfNoAdded < numOfNoForDelete) { record.remove(header.get(ViolationRecord.GSI_CORRECTION_DELETE_BLANK)); record.add(header.get(ViolationRecord.GSI_CORRECTION_DELETE_BLANK), "N"); numOfNoAdded++; continue; } } } // Add all records to file csvPrinter.printRecords(allRecords); } finally { br.close(); bw.close(); parser.close(); csvPrinter.close(); } if (errorRecords != null) return errorRecords; else return allRecords; }