Example usage for org.apache.commons.csv CSVParser getRecords

List of usage examples for org.apache.commons.csv CSVParser getRecords

Introduction

In this page you can find the example usage for org.apache.commons.csv CSVParser getRecords.

Prototype

public List<CSVRecord> getRecords() throws IOException 

Source Link

Document

Parses the CSV input according to the given format and returns the content as a list of CSVRecord CSVRecords .

Usage

From source file:javalibs.CSVDataNormalizer.java

private void readCSV() {
    try {/*from  w w  w .  j  a  v  a 2s  . c o  m*/
        CSVParser parser = new CSVParser(Files.newBufferedReader(Paths.get(this.csvPath)),
                CSVFormat.DEFAULT.withHeader().withIgnoreHeaderCase().withTrim());

        // Get all headers in the CSV file so they can be used later when writing the file
        this.headerMap = parser.getHeaderMap();

        // Add them to the records list for later use
        this.allRecords = parser.getRecords();

        parser.close();

        reverseHeaderMap();
    } catch (IOException e) {
        log_.die(e);
    }
}

From source file:net.sourceforge.ganttproject.io.GanttCSVOpen.java

/**
 * Create tasks from file.//www .jav a2s . co  m
 *
 * @throws IOException
 *           on parse error or input read-failure
 */
public boolean load() throws IOException {
    CSVParser parser = new CSVParser(myInputSupplier.get(),
            CSVFormat.DEFAULT.withEmptyLinesIgnored(false).withSurroundingSpacesIgnored(true));
    int numGroup = 0;
    RecordGroup currentGroup = null;
    boolean searchHeader = true;
    List<CSVRecord> records = parser.getRecords();
    for (CSVRecord record : records) {
        if (record.size() == 0) {
            // If line is empty then current record group is probably finished.
            // Let's search for the next group header.
            searchHeader = true;
            continue;
        }
        if (searchHeader) {
            // Record is not empty and we're searching for header.
            if (numGroup < myRecordGroups.size() && myRecordGroups.get(numGroup).isHeader(record)) {
                // If next group acknowledges the header, then we give it the turn,
                // otherwise it was just an empty line in the current group
                searchHeader = false;
                currentGroup = myRecordGroups.get(numGroup);
                parser.readHeader(record);
                currentGroup.setHeader(Lists.newArrayList(record.iterator()));
                numGroup++;
                continue;
            }
            searchHeader = false;
        }
        assert currentGroup != null;
        currentGroup.process(record);
    }
    for (RecordGroup group : myRecordGroups) {
        group.postProcess();
    }
    // Succeeded
    return true;
}

From source file:com.siemens.sw360.portal.portlets.admin.UserPortlet.java

private List<UserCSV> getUsersFromRequest(PortletRequest request, String fileUploadFormId)
        throws IOException, TException {

    final UploadPortletRequest uploadPortletRequest = PortalUtil.getUploadPortletRequest(request);

    final InputStream stream = uploadPortletRequest.getFileAsStream(fileUploadFormId);
    Reader reader = new InputStreamReader(stream);
    CSVFormat format = CommonUtils.sw360CsvFormat;
    CSVParser parser = new CSVParser(reader, format);
    List<CSVRecord> records;
    records = parser.getRecords();
    if (records.size() > 0) {
        records.remove(0); // Remove header
    }//from   w  ww  .  jav  a 2  s .co m

    return getUsersFromCSV(records);

}

From source file:com.ibm.watson.developer_cloud.professor_languo.model.stack_exchange.CorpusBuilderTest.java

private void deserialiezd_duplicate_threads_should_match_original_duplicate_threads()
        throws IngestionException {

    String csvFilePath = dupCorpusBuilder.getDupThreadDirPath()
            + StackExchangeConstants.DUP_THREAD_TSV_FILE_NAME
            + StackExchangeConstants.DUP_THREAD_TSV_FILE_EXTENSION;
    File csvData = new File(csvFilePath);

    CSVParser parser;
    List<CSVRecord> records;
    try {/*from   w w w.j  a v a2 s  .com*/
        parser = CSVParser.parse(csvData, Charset.defaultCharset(), CSVFormat.TDF.withHeader());
        records = parser.getRecords();
    } catch (IOException e) {
        throw new IngestionException(e);
    }

    Set<StackExchangeThread> dupThreadSet = dupCorpusBuilder.getDupThreadSetFromBinFiles();
    for (StackExchangeThread thread : dupThreadSet) {
        String binfileName = dupCorpusBuilder.getDupThreadDirPath() + thread.getId()
                + StackExchangeConstants.BIN_FILE_SUFFIX;
        CSVRecord matchRecord = null;
        for (CSVRecord record : records)
            if (Integer.parseInt(record.get(0)) == thread.getId()) {
                matchRecord = record;
                break;
            }
        assertTrue(matchRecord != null);
        // TODO haven't check the originId yet since it requires the new
        // method to get origin id from
        String deserTitle = matchRecord.get(1), deserBody = matchRecord.get(2),
                deserFileName = matchRecord.get(4), deserTags = matchRecord.get(5);
        assertEquals(deserTitle, thread.getQuestion().getTitle());
        assertEquals(deserBody, thread.getQuestion().getBody());
        assertEquals(deserFileName, binfileName);
        assertEquals(deserTags, thread.getConcatenatedTagsText());
    }
}

From source file:com.teamnx.util.CSVToDateBase.java

/**
 * @param fileName/*  w  w w.  j av a 2s . co  m*/
 * @param type
 * @return
 */
public List readCsvFile(String fileName, int type) {
    FileReader fileReader = null;
    CSVParser csvFileParser = null;
    List list = null;
    //CSVFormatheader mapping
    CSVFormat csvFileFormat = CSVFormat.DEFAULT.withHeader(file_header);
    try {
        //?FileReader object
        fileReader = new FileReader(fileName);
        //? CSVParser object
        csvFileParser = new CSVParser(fileReader, csvFileFormat);
        //CSVrecords
        List<CSVRecord> csvRecords = csvFileParser.getRecords();
        // CSV

        switch (type) {
        case USER:
            List<User> userList = new ArrayList<User>();
            for (int i = 1; i < csvRecords.size(); i++) {
                CSVRecord record = csvRecords.get(i);
                //?
                User user = new User();
                user.setId(record.get("id"));
                user.setName(record.get("name"));
                user.setPassword(record.get("password"));
                user.setDepartment_id(Integer.parseInt(record.get("department_id")));
                user.setCharacter(Integer.parseInt(record.get("character")));
                user.setClass_id(record.get("class_id"));
                user.setDepartment_name(record.get("department_name"));
                userList.add(user);
            }
            list = userList;
            break;
        case DEPARTMENT:
            List<Department> departmentList = new ArrayList<Department>();
            for (int i = 1; i < csvRecords.size(); i++) {
                CSVRecord record = csvRecords.get(i);
                //?
                Department department = new Department();
                department.setId(Integer.parseInt(record.get("id")));
                department.setName(record.get("name"));
                departmentList.add(department);
            }
            list = departmentList;
            break;
        case COURSE:
            List<Course> courseList = new ArrayList<Course>();
            for (int i = 1; i < csvRecords.size(); i++) {
                CSVRecord record = csvRecords.get(i);
                //?
                Course course = new Course();
                course.setId(record.get("id"));
                course.setName(record.get("name"));
                course.setDepartment_id(Integer.parseInt(record.get("department_id")));
                course.setStart_time(Integer.parseInt(record.get("start_time")));
                course.setEnd_time(Integer.parseInt(record.get("end_time")));
                course.setPosition(record.get("position"));
                course.setSchedule(record.get("schedule"));
                course.setYear(Integer.parseInt(record.get("year")));
                course.setSemester(Integer.parseInt(record.get("semester")));
                int j = Integer.parseInt(record.get("category"));
                course.setCategory(j == 1 ? true : false);
                course.setMax_member(Integer.parseInt(record.get("max_member")));
                courseList.add(course);
            }
            list = courseList;
            break;
        case STUDENT_COURSE:
            List<StudentCourse> studentCourseList = new ArrayList<StudentCourse>();
            for (int i = 1; i < csvRecords.size(); i++) {
                CSVRecord record = csvRecords.get(i);
                StudentCourse studentCourse = new StudentCourse();
                studentCourse.setId(record.get("id"));
                studentCourse.setCourseId(record.get("course_id"));
                studentCourse.setStudentId(record.get("student_id"));
                studentCourseList.add(studentCourse);
            }
            list = studentCourseList;
            break;
        case TEACHER_COURSE:
            List<TeacherCourse> teacherCourseList = new ArrayList<TeacherCourse>();
            for (int i = 1; i < csvRecords.size(); i++) {
                CSVRecord record = csvRecords.get(i);
                TeacherCourse teacherCourse = new TeacherCourse();
                teacherCourse.setId(record.get("id"));
                teacherCourse.setTeacherId(record.get("teacher_id"));
                teacherCourse.setCourseId(record.get("course_id"));
                teacherCourseList.add(teacherCourse);
            }
            list = teacherCourseList;
            break;

        }

    } catch (Exception e) {
        e.printStackTrace();
    } finally {
        try {
            fileReader.close();
            csvFileParser.close();
        } catch (IOException e) {
            e.printStackTrace();
        } finally {
            return list;
        }
    }
}

From source file:GUI.ReadFile.java

public boolean readTrace(String fileName) {
    FileReader fileReader;/* w  w w . j a  va 2s .  com*/
    CSVParser csvFileParser;
    boolean isSuccess = true;
    CSVFormat csvFileFormat = CSVFormat.DEFAULT.withHeader(TRACE_HEADER_MAPPING);

    try {
        ArrayList<String> Activity_set = new ArrayList<String>();
        HashSet<String> ID_set = new HashSet<String>();
        traces = new ArrayList<Trace>();
        //initialize FileReader object
        System.out.println(fileName);
        fileReader = new FileReader(fileName);

        //initialize CSVParser object
        csvFileParser = new CSVParser(fileReader, csvFileFormat);
        //Get a list of CSV file records
        List<CSVRecord> csvRecords = csvFileParser.getRecords();
        Trace t = new Trace("");
        //Read the CSV file records starting from the second record to skip the header
        for (int i = 1; i < csvRecords.size(); i++) {
            CSVRecord record = csvRecords.get(i);
            String ID = record.get(CaseID);
            if (!ID_set.contains(ID) || (i == csvRecords.size() - 1)) {
                //Discard void trace
                if (i != 1) {
                    traces.add(t);
                }
                ID_set.add(ID);
                t = new Trace(ID);
            }
            Activity ac = new Activity(record.get(Activity), record.get(StartTime), record.get(CompleteTime),
                    record.get(Timestamp));
            t.add_activity(ac);

            if (!Activity_set.contains(ac.get_name())) {
                Activity_set.add(ac.get_name());
            }
        }
        //sort activity set by string
        Collections.sort(Activity_set);

        //sort trace by ID
        Collections.sort(traces, new Comparator<Trace>() {
            @Override
            public int compare(Trace t1, Trace t2) {
                return Integer.parseInt(t1.get_ID()) < Integer.parseInt(t2.get_ID()) ? -1 : 1;
            }
        });
        //Set activity set for each trace
        for (Trace T : traces) {
            T.set_ActivitySet((List<String>) Activity_set.clone());
        }

    } catch (Exception e) {
        System.out.println("Error in CsvFileReader !!!");
        e.printStackTrace();
        isSuccess = false;
        return isSuccess;
    }
    if (isSuccess) {
        try {
            fileReader.close();
            csvFileParser.close();
        } catch (IOException e) {
            System.out.println("Error while closing fileReader/csvFileParser !!!");
        }
    }
    return isSuccess;
}

From source file:com.datafibers.kafka.connect.FileGenericSourceTask.java

/**
 * Decode Csv to struct according to schema form Confluent schema registry
 * @param line//from   w  w  w .  j  a  va2 s  .c o  m
 * @return struct of decoded
 */
public Struct structDecodingFromCsv(String line) {
    if (line.length() > 0) {
        Struct struct = new Struct(dataSchema);
        JsonNode json = null;
        try {
            // TODO support other type of files fro here
            CSVParser csvParser = CSVFormat.EXCEL.withIgnoreEmptyLines().withIgnoreHeaderCase()
                    .withRecordSeparator('\n').withQuote('"').withEscape('\\').withDelimiter(',').withTrim()
                    .parse(new StringReader(line));

            // Since this is single line parser, we get element 0 only
            CSVRecord entry = csvParser.getRecords().get(0);
            List<org.apache.kafka.connect.data.Field> fields = dataSchema.fields();
            int schema_fields_size = fields.size();
            log.info("schema_fields_size = " + schema_fields_size);

            for (int index = 0; index <= schema_fields_size - 1; index++) {
                Object value = null;
                org.apache.kafka.connect.data.Field theField = fields.get(index);
                log.info("printed indexed " + index + " fields: " + theField.name() + ":"
                        + theField.schema().type());
                if (theField != null) {
                    switch (theField.schema().type()) {
                    case STRING: {
                        value = entry.get(index);
                        break;
                    }
                    case INT32: {
                        value = Integer.parseInt(entry.get(index));
                        break;
                    }
                    case BOOLEAN: {
                        value = Boolean.parseBoolean(entry.get(index));
                        break;
                    }
                    default:
                        value = entry.get(index);
                    }
                }
                struct.put(theField.name(), value);
            }
        } catch (IOException ex) {
            throw new ConnectException(String.format("Unable to parse %s into a valid CSV", filename), ex);
        }
        return struct;
    }
    return null;
}

From source file:com.archimatetool.csv.importer.CSVImporter.java

/**
 * Get all records for a CSV file.//w  w  w  .j a v a2s .c  om
 * This is a brute-force approach to try with a comma delimiter first. If that fails then
 * try a semicolon, and if that fails, a tab.
 * 
 * @param file The file to open
 * @return Records, which may be empty but never null
 * @throws IOException
 */
List<CSVRecord> getRecords(File file) throws IOException {
    List<CSVRecord> records = new ArrayList<CSVRecord>();
    CSVParser parser = null;

    String errorMessage = "invalid char between encapsulated token and delimiter"; //$NON-NLS-1$

    try {
        parser = new CSVParser(new FileReader(file), CSVFormat.DEFAULT);
        records = parser.getRecords();
    } catch (IOException ex) {
        if (parser != null) {
            parser.close();
        }
        if (ex.getMessage() != null && ex.getMessage().contains(errorMessage)) {
            try {
                parser = new CSVParser(new FileReader(file), CSVFormat.DEFAULT.withDelimiter(';'));
                records = parser.getRecords();
            } catch (IOException ex2) {
                if (parser != null) {
                    parser.close();
                }
                if (ex2.getMessage() != null && ex2.getMessage().contains(errorMessage)) {
                    parser = new CSVParser(new FileReader(file), CSVFormat.DEFAULT.withDelimiter('\t'));
                    records = parser.getRecords();
                } else {
                    throw ex2;
                }
            }
        } else {
            throw ex;
        }
    } finally {
        if (parser != null) {
            parser.close();
        }
    }

    return records;
}

From source file:com.ggvaidya.scinames.model.Dataset.java

/**
 * Load this dataset from a CSV file. We load the entire CSV file, except
 * for blank cells./*from   w ww .ja  v a  2 s  . c om*/
 * 
 * @param project The project to which the resulting Dataset should belong
 * @param csvFormat The CSV format of the input file.
 * @param csvFile The input file to load.
 * @param renamedColumns Rename these columns on the fly.
 * @return
 * @throws IOException 
 */
public static Dataset fromCSV(CSVFormat csvFormat, File csvFile) throws IOException {
    Dataset dataset = new Dataset(csvFile.getName(), new SimplifiedDate(), Dataset.TYPE_CHECKLIST);

    // Get ready to filter input files.
    InputStream ins = new FileInputStream(csvFile);

    // Look for BOMs and discard!
    ins = new BOMInputStream(ins, false);

    // Convert into a Reader.
    Reader reader = new BufferedReader(new InputStreamReader(ins));

    // Load CSV
    CSVParser parser = csvFormat.withHeader().parse(reader);
    Map<String, Integer> headerMap = parser.getHeaderMap();

    dataset.setColumns(headerMap.entrySet().stream().sorted((Object o1, Object o2) -> {
        Map.Entry<String, Integer> e1 = (Map.Entry) o1;
        Map.Entry<String, Integer> e2 = (Map.Entry) o2;

        return e1.getValue().compareTo(e2.getValue());
    }).map(e -> e.getKey()).map(colName -> DatasetColumn.of(colName))
            /*
            .map(col -> {
               // Rename any renamedColumns.
               if(renamedColumns.containsKey(col))
            return renamedColumns.get(col);
               else
            return col;
            })*/
            .collect(Collectors.toList()));

    dataset.rows.clear();
    dataset.rows.addAll(parser.getRecords().stream().map(record -> {
        DatasetRow row = new DatasetRow(dataset);
        row.putAll(record.toMap());
        return row;
    }).collect(Collectors.toList()));

    return dataset;
}

From source file:com.amazonaws.services.dynamodbv2.online.index.integration.tests.ViolationCorrectionTest.java

/**
 * Iterates through detection output file: first leave updates blank based on missing updates per key. 
 * Once it has reached the missing update number, it removes the expected gsi values as per the specified 'missingGsiExpectedHashValues'.
 * Note that once blank number is reached, it also starts adding updates. 
 * It then iterates over the rows again and adds values for Yes/No/Invalid in the delete column.
 * It returns all error records, if present. If not, it returns all records.
 *///from  w  w w . j a v a  2  s. c  om
private static List<List<String>> createCorrectionFile(final String detectionFile, final String correctionFile,
        final String gsiHashKeyName, final String gsiHashKeyType, final String gsiRangeKeyName,
        final String gsiRangeKeyType, final Map<String, String> tableHashToNewGsiHashValueMap,
        final Map<String, String> tableHashToNewGsiRangeValueMap, final int missingUpdatesPerKey,
        final int missingGsiExpectedHashValues, final int invalidValuesForDelete, final int numOfYesForDelete,
        final int numOfNoForDelete) throws IOException {

    List<List<String>> errorRecords = null;
    List<List<String>> allRecords = null;

    BufferedReader br = null;
    BufferedWriter bw = null;
    CSVParser parser = null;
    CSVPrinter csvPrinter = null;
    try {
        br = new BufferedReader(new FileReader(new File(detectionFile)));
        bw = new BufferedWriter(new FileWriter(new File(correctionFile)));
        parser = new CSVParser(br, TestUtils.csvFormat);
        csvPrinter = new CSVPrinter(bw, TestUtils.csvFormat);
        List<CSVRecord> detectorRecords = parser.getRecords();

        int hashMissingUpdates = 0;
        int rangeMissingUpdates = 0;
        int missingGsiExpectedHashValuesCurrent = 0;

        // Print Header
        Map<String, Integer> header = parser.getHeaderMap();
        csvPrinter.printRecord(header.keySet());

        allRecords = new ArrayList<List<String>>();
        for (CSVRecord csvRecord : detectorRecords) {
            List<String> newRecord = new ArrayList<String>();
            String tableHashKeyRecorded = csvRecord.get(ViolationRecord.TABLE_HASH_KEY);

            String hashKeyViolationType = null;
            if (gsiHashKeyName != null) {
                hashKeyViolationType = csvRecord.get(ViolationRecord.GSI_HASH_KEY_VIOLATION_TYPE);
            }
            String rangeKeyViolationType = null;
            if (gsiRangeKeyName != null) {
                rangeKeyViolationType = csvRecord.get(ViolationRecord.GSI_RANGE_KEY_VIOLATION_TYPE);
            }

            for (int i = 0; i < csvRecord.size(); i++) {
                newRecord.add(i, csvRecord.get(i));
            }

            String newGsiVal = null;
            if (hashKeyViolationType != null && (hashKeyViolationType.equals("Size Violation")
                    || hashKeyViolationType.equals("Type Violation"))) {
                if (hashMissingUpdates < missingUpdatesPerKey) {
                    allRecords.add(newRecord);
                    hashMissingUpdates++;
                    continue;
                }
                //Remove expected hash Values
                if (missingGsiExpectedHashValuesCurrent < missingGsiExpectedHashValues) {
                    newRecord.remove((int) header.get(ViolationRecord.GSI_HASH_KEY));
                    newRecord.add(header.get(ViolationRecord.GSI_HASH_KEY), "");
                    missingGsiExpectedHashValuesCurrent++;
                }

                newRecord.remove((int) header.get(ViolationRecord.GSI_HASH_KEY_UPDATE_VALUE));
                newGsiVal = getNewValue(gsiHashKeyType, 4 /*length*/);
                newRecord.add(header.get(ViolationRecord.GSI_HASH_KEY_UPDATE_VALUE), newGsiVal);
                tableHashToNewGsiHashValueMap.put(tableHashKeyRecorded, newGsiVal);
            }

            if (rangeKeyViolationType != null && (rangeKeyViolationType.equals("Size Violation")
                    || rangeKeyViolationType.equals("Type Violation"))) {
                if (rangeMissingUpdates < missingUpdatesPerKey) {
                    allRecords.add(newRecord);
                    rangeMissingUpdates++;
                    continue;
                }

                newRecord.remove(header.get(ViolationRecord.GSI_RANGE_KEY_UPDATE_VALUE));
                newGsiVal = getNewValue(gsiRangeKeyType, 4 /*length*/);
                newRecord.add(header.get(ViolationRecord.GSI_RANGE_KEY_UPDATE_VALUE), newGsiVal);
                tableHashToNewGsiRangeValueMap.put(tableHashKeyRecorded, newGsiVal);
            }
            allRecords.add(newRecord);
        }

        // Add 'Y' or 'N' for delete column
        if (numOfNoForDelete > 0 || numOfYesForDelete > 0 || invalidValuesForDelete > 0) {
            errorRecords = new ArrayList<List<String>>();
            int numOfYesAdded = 0;
            int numOfNoAdded = 0;
            int numOfInvalids = 0;
            for (List<String> record : allRecords) {
                if (numOfInvalids < invalidValuesForDelete) {
                    record.remove(header.get(ViolationRecord.GSI_CORRECTION_DELETE_BLANK));
                    record.add(header.get(ViolationRecord.GSI_CORRECTION_DELETE_BLANK), "xx");
                    numOfInvalids++;
                    errorRecords.add(record);
                    continue;
                }

                if (numOfYesAdded < numOfYesForDelete) {
                    record.remove(header.get(ViolationRecord.GSI_CORRECTION_DELETE_BLANK));
                    record.add(header.get(ViolationRecord.GSI_CORRECTION_DELETE_BLANK), "Y");
                    numOfYesAdded++;
                    continue;
                }

                if (numOfNoAdded < numOfNoForDelete) {
                    record.remove(header.get(ViolationRecord.GSI_CORRECTION_DELETE_BLANK));
                    record.add(header.get(ViolationRecord.GSI_CORRECTION_DELETE_BLANK), "N");
                    numOfNoAdded++;
                    continue;
                }
            }
        }

        // Add all records to file
        csvPrinter.printRecords(allRecords);
    } finally {
        br.close();
        bw.close();
        parser.close();
        csvPrinter.close();
    }

    if (errorRecords != null)
        return errorRecords;
    else
        return allRecords;
}