Example usage for org.apache.commons.csv CSVParser getRecords

Introduction

In this page you can find the example usage for org.apache.commons.csv CSVParser getRecords.

Prototype

public List<CSVRecord> getRecords() throws IOException

Source Link

Document

Parses the CSV input according to the given format and returns the content as a list of CSVRecord CSVRecords .

Usage

From source file:javalibs.CSVDataNormalizer.java

private void readCSV() {
    try {/*from  w w  w .  j  a  v  a 2s  . c o  m*/
        CSVParser parser = new CSVParser(Files.newBufferedReader(Paths.get(this.csvPath)),
                CSVFormat.DEFAULT.withHeader().withIgnoreHeaderCase().withTrim());

        // Get all headers in the CSV file so they can be used later when writing the file
        this.headerMap = parser.getHeaderMap();

        // Add them to the records list for later use
        this.allRecords = parser.getRecords();

        parser.close();

        reverseHeaderMap();
    } catch (IOException e) {
        log_.die(e);
    }
}

From source file:net.sourceforge.ganttproject.io.GanttCSVOpen.java

/**
 * Create tasks from file.//www .jav a2s . co  m
 *
 * @throws IOException
 *           on parse error or input read-failure
 */
public boolean load() throws IOException {
    CSVParser parser = new CSVParser(myInputSupplier.get(),
            CSVFormat.DEFAULT.withEmptyLinesIgnored(false).withSurroundingSpacesIgnored(true));
    int numGroup = 0;
    RecordGroup currentGroup = null;
    boolean searchHeader = true;
    List<CSVRecord> records = parser.getRecords();
    for (CSVRecord record : records) {
        if (record.size() == 0) {
            // If line is empty then current record group is probably finished.
            // Let's search for the next group header.
            searchHeader = true;
            continue;
        }
        if (searchHeader) {
            // Record is not empty and we're searching for header.
            if (numGroup < myRecordGroups.size() && myRecordGroups.get(numGroup).isHeader(record)) {
                // If next group acknowledges the header, then we give it the turn,
                // otherwise it was just an empty line in the current group
                searchHeader = false;
                currentGroup = myRecordGroups.get(numGroup);
                parser.readHeader(record);
                currentGroup.setHeader(Lists.newArrayList(record.iterator()));
                numGroup++;
                continue;
            }
            searchHeader = false;
        }
        assert currentGroup != null;
        currentGroup.process(record);
    }
    for (RecordGroup group : myRecordGroups) {
        group.postProcess();
    }
    // Succeeded
    return true;
}

From source file:com.siemens.sw360.portal.portlets.admin.UserPortlet.java

private List<UserCSV> getUsersFromRequest(PortletRequest request, String fileUploadFormId)
        throws IOException, TException {

    final UploadPortletRequest uploadPortletRequest = PortalUtil.getUploadPortletRequest(request);

    final InputStream stream = uploadPortletRequest.getFileAsStream(fileUploadFormId);
    Reader reader = new InputStreamReader(stream);
    CSVFormat format = CommonUtils.sw360CsvFormat;
    CSVParser parser = new CSVParser(reader, format);
    List<CSVRecord> records;
    records = parser.getRecords();
    if (records.size() > 0) {
        records.remove(0); // Remove header
    }//from   w  ww  .  jav  a 2  s .co m

    return getUsersFromCSV(records);

}

From source file:com.ibm.watson.developer_cloud.professor_languo.model.stack_exchange.CorpusBuilderTest.java

private void deserialiezd_duplicate_threads_should_match_original_duplicate_threads()
        throws IngestionException {

    String csvFilePath = dupCorpusBuilder.getDupThreadDirPath()
            + StackExchangeConstants.DUP_THREAD_TSV_FILE_NAME
            + StackExchangeConstants.DUP_THREAD_TSV_FILE_EXTENSION;
    File csvData = new File(csvFilePath);

    CSVParser parser;
    List<CSVRecord> records;
    try {/*from   w w w.j  a v a2 s  .com*/
        parser = CSVParser.parse(csvData, Charset.defaultCharset(), CSVFormat.TDF.withHeader());
        records = parser.getRecords();
    } catch (IOException e) {
        throw new IngestionException(e);
    }

    Set<StackExchangeThread> dupThreadSet = dupCorpusBuilder.getDupThreadSetFromBinFiles();
    for (StackExchangeThread thread : dupThreadSet) {
        String binfileName = dupCorpusBuilder.getDupThreadDirPath() + thread.getId()
                + StackExchangeConstants.BIN_FILE_SUFFIX;
        CSVRecord matchRecord = null;
        for (CSVRecord record : records)
            if (Integer.parseInt(record.get(0)) == thread.getId()) {
                matchRecord = record;
                break;
            }
        assertTrue(matchRecord != null);
        // TODO haven't check the originId yet since it requires the new
        // method to get origin id from
        String deserTitle = matchRecord.get(1), deserBody = matchRecord.get(2),
                deserFileName = matchRecord.get(4), deserTags = matchRecord.get(5);
        assertEquals(deserTitle, thread.getQuestion().getTitle());
        assertEquals(deserBody, thread.getQuestion().getBody());
        assertEquals(deserFileName, binfileName);
        assertEquals(deserTags, thread.getConcatenatedTagsText());
    }
}

From source file:com.teamnx.util.CSVToDateBase.java

/**
 * @param fileName/*  w  w w.  j av a 2s . co  m*/
 * @param type
 * @return
 */
public List readCsvFile(String fileName, int type) {
    FileReader fileReader = null;
    CSVParser csvFileParser = null;
    List list = null;
    //CSVFormatheader mapping
    CSVFormat csvFileFormat = CSVFormat.DEFAULT.withHeader(file_header);
    try {
        //?FileReader object
        fileReader = new FileReader(fileName);
        //? CSVParser object
        csvFileParser = new CSVParser(fileReader, csvFileFormat);
        //CSVrecords
        List<CSVRecord> csvRecords = csvFileParser.getRecords();
        // CSV

        switch (type) {
        case USER:
            List<User> userList = new ArrayList<User>();
            for (int i = 1; i < csvRecords.size(); i++) {
                CSVRecord record = csvRecords.get(i);
                //?
                User user = new User();
                user.setId(record.get("id"));
                user.setName(record.get("name"));
                user.setPassword(record.get("password"));
                user.setDepartment_id(Integer.parseInt(record.get("department_id")));
                user.setCharacter(Integer.parseInt(record.get("character")));
                user.setClass_id(record.get("class_id"));
                user.setDepartment_name(record.get("department_name"));
                userList.add(user);
            }
            list = userList;
            break;
        case DEPARTMENT:
            List<Department> departmentList = new ArrayList<Department>();
            for (int i = 1; i < csvRecords.size(); i++) {
                CSVRecord record = csvRecords.get(i);
                //?
                Department department = new Department();
                department.setId(Integer.parseInt(record.get("id")));
                department.setName(record.get("name"));
                departmentList.add(department);
            }
            list = departmentList;
            break;
        case COURSE:
            List<Course> courseList = new ArrayList<Course>();
            for (int i = 1; i < csvRecords.size(); i++) {
                CSVRecord record = csvRecords.get(i);
                //?
                Course course = new Course();
                course.setId(record.get("id"));
                course.setName(record.get("name"));
                course.setDepartment_id(Integer.parseInt(record.get("department_id")));
                course.setStart_time(Integer.parseInt(record.get("start_time")));
                course.setEnd_time(Integer.parseInt(record.get("end_time")));
                course.setPosition(record.get("position"));
                course.setSchedule(record.get("schedule"));
                course.setYear(Integer.parseInt(record.get("year")));
                course.setSemester(Integer.parseInt(record.get("semester")));
                int j = Integer.parseInt(record.get("category"));
                course.setCategory(j == 1 ? true : false);
                course.setMax_member(Integer.parseInt(record.get("max_member")));
                courseList.add(course);
            }
            list = courseList;
            break;
        case STUDENT_COURSE:
            List<StudentCourse> studentCourseList = new ArrayList<StudentCourse>();
            for (int i = 1; i < csvRecords.size(); i++) {
                CSVRecord record = csvRecords.get(i);
                StudentCourse studentCourse = new StudentCourse();
                studentCourse.setId(record.get("id"));
                studentCourse.setCourseId(record.get("course_id"));
                studentCourse.setStudentId(record.get("student_id"));
                studentCourseList.add(studentCourse);
            }
            list = studentCourseList;
            break;
        case TEACHER_COURSE:
            List<TeacherCourse> teacherCourseList = new ArrayList<TeacherCourse>();
            for (int i = 1; i < csvRecords.size(); i++) {
                CSVRecord record = csvRecords.get(i);
                TeacherCourse teacherCourse = new TeacherCourse();
                teacherCourse.setId(record.get("id"));
                teacherCourse.setTeacherId(record.get("teacher_id"));
                teacherCourse.setCourseId(record.get("course_id"));
                teacherCourseList.add(teacherCourse);
            }
            list = teacherCourseList;
            break;

        }

    } catch (Exception e) {
        e.printStackTrace();
    } finally {
        try {
            fileReader.close();
            csvFileParser.close();
        } catch (IOException e) {
            e.printStackTrace();
        } finally {
            return list;
        }
    }
}

From source file:GUI.ReadFile.java

public boolean readTrace(String fileName) {
    FileReader fileReader;/* w  w w . j a  va 2s .  com*/
    CSVParser csvFileParser;
    boolean isSuccess = true;
    CSVFormat csvFileFormat = CSVFormat.DEFAULT.withHeader(TRACE_HEADER_MAPPING);

    try {
        ArrayList<String> Activity_set = new ArrayList<String>();
        HashSet<String> ID_set = new HashSet<String>();
        traces = new ArrayList<Trace>();
        //initialize FileReader object
        System.out.println(fileName);
        fileReader = new FileReader(fileName);

        //initialize CSVParser object
        csvFileParser = new CSVParser(fileReader, csvFileFormat);
        //Get a list of CSV file records
        List<CSVRecord> csvRecords = csvFileParser.getRecords();
        Trace t = new Trace("");
        //Read the CSV file records starting from the second record to skip the header
        for (int i = 1; i < csvRecords.size(); i++) {
            CSVRecord record = csvRecords.get(i);
            String ID = record.get(CaseID);
            if (!ID_set.contains(ID) || (i == csvRecords.size() - 1)) {
                //Discard void trace
                if (i != 1) {
                    traces.add(t);
                }
                ID_set.add(ID);
                t = new Trace(ID);
            }
            Activity ac = new Activity(record.get(Activity), record.get(StartTime), record.get(CompleteTime),
                    record.get(Timestamp));
            t.add_activity(ac);

            if (!Activity_set.contains(ac.get_name())) {
                Activity_set.add(ac.get_name());
            }
        }
        //sort activity set by string
        Collections.sort(Activity_set);

        //sort trace by ID
        Collections.sort(traces, new Comparator<Trace>() {
            @Override
            public int compare(Trace t1, Trace t2) {
                return Integer.parseInt(t1.get_ID()) < Integer.parseInt(t2.get_ID()) ? -1 : 1;
            }
        });
        //Set activity set for each trace
        for (Trace T : traces) {
            T.set_ActivitySet((List<String>) Activity_set.clone());
        }

    } catch (Exception e) {
        System.out.println("Error in CsvFileReader !!!");
        e.printStackTrace();
        isSuccess = false;
        return isSuccess;
    }
    if (isSuccess) {
        try {
            fileReader.close();
            csvFileParser.close();
        } catch (IOException e) {
            System.out.println("Error while closing fileReader/csvFileParser !!!");
        }
    }
    return isSuccess;
}

From source file:com.datafibers.kafka.connect.FileGenericSourceTask.java

/**
 * Decode Csv to struct according to schema form Confluent schema registry
 * @param line//from   w  w  w .  j  a  va2 s  .c o  m
 * @return struct of decoded
 */
public Struct structDecodingFromCsv(String line) {
    if (line.length() > 0) {
        Struct struct = new Struct(dataSchema);
        JsonNode json = null;
        try {
            // TODO support other type of files fro here
            CSVParser csvParser = CSVFormat.EXCEL.withIgnoreEmptyLines().withIgnoreHeaderCase()
                    .withRecordSeparator('\n').withQuote('"').withEscape('\\').withDelimiter(',').withTrim()
                    .parse(new StringReader(line));

            // Since this is single line parser, we get element 0 only
            CSVRecord entry = csvParser.getRecords().get(0);
            List<org.apache.kafka.connect.data.Field> fields = dataSchema.fields();
            int schema_fields_size = fields.size();
            log.info("schema_fields_size = " + schema_fields_size);

            for (int index = 0; index <= schema_fields_size - 1; index++) {
                Object value = null;
                org.apache.kafka.connect.data.Field theField = fields.get(index);
                log.info("printed indexed " + index + " fields: " + theField.name() + ":"
                        + theField.schema().type());
                if (theField != null) {
                    switch (theField.schema().type()) {
                    case STRING: {
                        value = entry.get(index);
                        break;
                    }
                    case INT32: {
                        value = Integer.parseInt(entry.get(index));
                        break;
                    }
                    case BOOLEAN: {
                        value = Boolean.parseBoolean(entry.get(index));
                        break;
                    }
                    default:
                        value = entry.get(index);
                    }
                }
                struct.put(theField.name(), value);
            }
        } catch (IOException ex) {
            throw new ConnectException(String.format("Unable to parse %s into a valid CSV", filename), ex);
        }
        return struct;
    }
    return null;
}

From source file:com.archimatetool.csv.importer.CSVImporter.java

/**
 * Get all records for a CSV file.//w  w  w  .j a v a2s .c  om
 * This is a brute-force approach to try with a comma delimiter first. If that fails then
 * try a semicolon, and if that fails, a tab.
 * 
 * @param file The file to open
 * @return Records, which may be empty but never null
 * @throws IOException
 */
List<CSVRecord> getRecords(File file) throws IOException {
    List<CSVRecord> records = new ArrayList<CSVRecord>();
    CSVParser parser = null;

    String errorMessage = "invalid char between encapsulated token and delimiter"; //$NON-NLS-1$

    try {
        parser = new CSVParser(new FileReader(file), CSVFormat.DEFAULT);
        records = parser.getRecords();
    } catch (IOException ex) {
        if (parser != null) {
            parser.close();
        }
        if (ex.getMessage() != null && ex.getMessage().contains(errorMessage)) {
            try {
                parser = new CSVParser(new FileReader(file), CSVFormat.DEFAULT.withDelimiter(';'));
                records = parser.getRecords();
            } catch (IOException ex2) {
                if (parser != null) {
                    parser.close();
                }
                if (ex2.getMessage() != null && ex2.getMessage().contains(errorMessage)) {
                    parser = new CSVParser(new FileReader(file), CSVFormat.DEFAULT.withDelimiter('\t'));
                    records = parser.getRecords();
                } else {
                    throw ex2;
                }
            }
        } else {
            throw ex;
        }
    } finally {
        if (parser != null) {
            parser.close();
        }
    }

    return records;
}

From source file:com.ggvaidya.scinames.model.Dataset.java

/**
 * Load this dataset from a CSV file. We load the entire CSV file, except
 * for blank cells./*from   w ww .ja  v a  2 s  . c om*/
 * 
 * @param project The project to which the resulting Dataset should belong
 * @param csvFormat The CSV format of the input file.
 * @param csvFile The input file to load.
 * @param renamedColumns Rename these columns on the fly.
 * @return
 * @throws IOException 
 */
public static Dataset fromCSV(CSVFormat csvFormat, File csvFile) throws IOException {
    Dataset dataset = new Dataset(csvFile.getName(), new SimplifiedDate(), Dataset.TYPE_CHECKLIST);

    // Get ready to filter input files.
    InputStream ins = new FileInputStream(csvFile);

    // Look for BOMs and discard!
    ins = new BOMInputStream(ins, false);

    // Convert into a Reader.
    Reader reader = new BufferedReader(new InputStreamReader(ins));

    // Load CSV
    CSVParser parser = csvFormat.withHeader().parse(reader);
    Map<String, Integer> headerMap = parser.getHeaderMap();

    dataset.setColumns(headerMap.entrySet().stream().sorted((Object o1, Object o2) -> {
        Map.Entry<String, Integer> e1 = (Map.Entry) o1;
        Map.Entry<String, Integer> e2 = (Map.Entry) o2;

        return e1.getValue().compareTo(e2.getValue());
    }).map(e -> e.getKey()).map(colName -> DatasetColumn.of(colName))
            /*
            .map(col -> {
               // Rename any renamedColumns.
               if(renamedColumns.containsKey(col))
            return renamedColumns.get(col);
               else
            return col;
            })*/
            .collect(Collectors.toList()));

    dataset.rows.clear();
    dataset.rows.addAll(parser.getRecords().stream().map(record -> {
        DatasetRow row = new DatasetRow(dataset);
        row.putAll(record.toMap());
        return row;
    }).collect(Collectors.toList()));

    return dataset;
}

From source file:com.amazonaws.services.dynamodbv2.online.index.integration.tests.ViolationCorrectionTest.java

/**
 * Iterates through detection output file: first leave updates blank based on missing updates per key. 
 * Once it has reached the missing update number, it removes the expected gsi values as per the specified 'missingGsiExpectedHashValues'.
 * Note that once blank number is reached, it also starts adding updates. 
 * It then iterates over the rows again and adds values for Yes/No/Invalid in the delete column.
 * It returns all error records, if present. If not, it returns all records.
 *///from  w  w w . j a v a  2  s. c  om
private static List<List<String>> createCorrectionFile(final String detectionFile, final String correctionFile,
        final String gsiHashKeyName, final String gsiHashKeyType, final String gsiRangeKeyName,
        final String gsiRangeKeyType, final Map<String, String> tableHashToNewGsiHashValueMap,
        final Map<String, String> tableHashToNewGsiRangeValueMap, final int missingUpdatesPerKey,
        final int missingGsiExpectedHashValues, final int invalidValuesForDelete, final int numOfYesForDelete,
        final int numOfNoForDelete) throws IOException {

    List<List<String>> errorRecords = null;
    List<List<String>> allRecords = null;

    BufferedReader br = null;
    BufferedWriter bw = null;
    CSVParser parser = null;
    CSVPrinter csvPrinter = null;
    try {
        br = new BufferedReader(new FileReader(new File(detectionFile)));
        bw = new BufferedWriter(new FileWriter(new File(correctionFile)));
        parser = new CSVParser(br, TestUtils.csvFormat);
        csvPrinter = new CSVPrinter(bw, TestUtils.csvFormat);
        List<CSVRecord> detectorRecords = parser.getRecords();

        int hashMissingUpdates = 0;
        int rangeMissingUpdates = 0;
        int missingGsiExpectedHashValuesCurrent = 0;

        // Print Header
        Map<String, Integer> header = parser.getHeaderMap();
        csvPrinter.printRecord(header.keySet());

        allRecords = new ArrayList<List<String>>();
        for (CSVRecord csvRecord : detectorRecords) {
            List<String> newRecord = new ArrayList<String>();
            String tableHashKeyRecorded = csvRecord.get(ViolationRecord.TABLE_HASH_KEY);

            String hashKeyViolationType = null;
            if (gsiHashKeyName != null) {
                hashKeyViolationType = csvRecord.get(ViolationRecord.GSI_HASH_KEY_VIOLATION_TYPE);
            }
            String rangeKeyViolationType = null;
            if (gsiRangeKeyName != null) {
                rangeKeyViolationType = csvRecord.get(ViolationRecord.GSI_RANGE_KEY_VIOLATION_TYPE);
            }

            for (int i = 0; i < csvRecord.size(); i++) {
                newRecord.add(i, csvRecord.get(i));
            }

            String newGsiVal = null;
            if (hashKeyViolationType != null && (hashKeyViolationType.equals("Size Violation")
                    || hashKeyViolationType.equals("Type Violation"))) {
                if (hashMissingUpdates < missingUpdatesPerKey) {
                    allRecords.add(newRecord);
                    hashMissingUpdates++;
                    continue;
                }
                //Remove expected hash Values
                if (missingGsiExpectedHashValuesCurrent < missingGsiExpectedHashValues) {
                    newRecord.remove((int) header.get(ViolationRecord.GSI_HASH_KEY));
                    newRecord.add(header.get(ViolationRecord.GSI_HASH_KEY), "");
                    missingGsiExpectedHashValuesCurrent++;
                }

                newRecord.remove((int) header.get(ViolationRecord.GSI_HASH_KEY_UPDATE_VALUE));
                newGsiVal = getNewValue(gsiHashKeyType, 4 /*length*/);
                newRecord.add(header.get(ViolationRecord.GSI_HASH_KEY_UPDATE_VALUE), newGsiVal);
                tableHashToNewGsiHashValueMap.put(tableHashKeyRecorded, newGsiVal);
            }

            if (rangeKeyViolationType != null && (rangeKeyViolationType.equals("Size Violation")
                    || rangeKeyViolationType.equals("Type Violation"))) {
                if (rangeMissingUpdates < missingUpdatesPerKey) {
                    allRecords.add(newRecord);
                    rangeMissingUpdates++;
                    continue;
                }

                newRecord.remove(header.get(ViolationRecord.GSI_RANGE_KEY_UPDATE_VALUE));
                newGsiVal = getNewValue(gsiRangeKeyType, 4 /*length*/);
                newRecord.add(header.get(ViolationRecord.GSI_RANGE_KEY_UPDATE_VALUE), newGsiVal);
                tableHashToNewGsiRangeValueMap.put(tableHashKeyRecorded, newGsiVal);
            }
            allRecords.add(newRecord);
        }

        // Add 'Y' or 'N' for delete column
        if (numOfNoForDelete > 0 || numOfYesForDelete > 0 || invalidValuesForDelete > 0) {
            errorRecords = new ArrayList<List<String>>();
            int numOfYesAdded = 0;
            int numOfNoAdded = 0;
            int numOfInvalids = 0;
            for (List<String> record : allRecords) {
                if (numOfInvalids < invalidValuesForDelete) {
                    record.remove(header.get(ViolationRecord.GSI_CORRECTION_DELETE_BLANK));
                    record.add(header.get(ViolationRecord.GSI_CORRECTION_DELETE_BLANK), "xx");
                    numOfInvalids++;
                    errorRecords.add(record);
                    continue;
                }

                if (numOfYesAdded < numOfYesForDelete) {
                    record.remove(header.get(ViolationRecord.GSI_CORRECTION_DELETE_BLANK));
                    record.add(header.get(ViolationRecord.GSI_CORRECTION_DELETE_BLANK), "Y");
                    numOfYesAdded++;
                    continue;
                }

                if (numOfNoAdded < numOfNoForDelete) {
                    record.remove(header.get(ViolationRecord.GSI_CORRECTION_DELETE_BLANK));
                    record.add(header.get(ViolationRecord.GSI_CORRECTION_DELETE_BLANK), "N");
                    numOfNoAdded++;
                    continue;
                }
            }
        }

        // Add all records to file
        csvPrinter.printRecords(allRecords);
    } finally {
        br.close();
        bw.close();
        parser.close();
        csvPrinter.close();
    }

    if (errorRecords != null)
        return errorRecords;
    else
        return allRecords;
}