Example usage for org.apache.commons.csv CSVParser getRecords

Introduction

In this page you can find the example usage for org.apache.commons.csv CSVParser getRecords.

Prototype

public List<CSVRecord> getRecords() throws IOException

Source Link

Document

Parses the CSV input according to the given format and returns the content as a list of CSVRecord CSVRecords .

Usage

From source file:com.amazonaws.services.dynamodbv2.online.index.integration.tests.ViolationDetectionTest.java

/**
 * Verifies the contents of the violation detection output file
 *///w  w w . j a  va  2  s.  com
private void checkViolationFileOutput(String detectorOpFile, String tableHashKey, String tableRangeKey,
        String tableHashKeyType, String tableRangeKeyType, boolean violatedValueExists,
        List<Map<String, AttributeValue>> hashKeySizeViolations,
        List<Map<String, AttributeValue>> rangeKeySizeViolations,
        List<Map<String, AttributeValue>> hashKeyTypeViolations,
        List<Map<String, AttributeValue>> rangeKeyTypeViolations, int violatedSize, String gsiHashKeyName,
        String gsiHashKeyType, String gsiRangeKeyName, String gsiRangeKeyType) throws IOException {

    Map<String, String> hashToRangeHashSizeViolationsMap = new HashMap<String, String>();
    Map<String, String> hashToRangeRangeSizeViolationsMap = new HashMap<String, String>();
    Map<String, String> hashToRangeHashTypeViolationsMap = new HashMap<String, String>();
    Map<String, String> hashToRangeRangeTypeViolationsMap = new HashMap<String, String>();

    Map<String, String> hashToGsiHashSizeViolationsMap = new HashMap<String, String>();
    Map<String, String> hashToGsiRangeSizeViolationsMap = new HashMap<String, String>();
    Map<String, String> hashToGsiHashTypeViolationsMap = new HashMap<String, String>();
    Map<String, String> hashToGsiRangeTypeViolationsMap = new HashMap<String, String>();

    BufferedReader br = null;
    CSVParser parser = null;
    try {
        br = new BufferedReader(new FileReader(new File(detectorOpFile)));
        parser = new CSVParser(br, TestUtils.csvFormat);
        List<CSVRecord> csvRecords = parser.getRecords();
        for (CSVRecord csvRecord : csvRecords) {
            String hashKey = csvRecord.get(ViolationRecord.TABLE_HASH_KEY);
            String rangeKey = csvRecord.get(ViolationRecord.TABLE_RANGE_KEY);
            String gsiHashKeyValue = null;
            if (violatedValueExists) {
                gsiHashKeyValue = csvRecord.get(ViolationRecord.GSI_HASH_KEY);
            }
            String hashKeyViolationType = csvRecord.get(ViolationRecord.GSI_HASH_KEY_VIOLATION_TYPE);
            String hashKeyViolationDesc = csvRecord.get(ViolationRecord.GSI_HASH_KEY_VIOLATION_DESC);
            String gsiRangeKeyValue = null;
            if (violatedValueExists) {
                gsiRangeKeyValue = csvRecord.get(ViolationRecord.GSI_RANGE_KEY);
            }
            String rangeKeyViolationType = csvRecord.get(ViolationRecord.GSI_RANGE_KEY_VIOLATION_TYPE);
            String rangeKeyViolationDesc = csvRecord.get(ViolationRecord.GSI_RANGE_KEY_VIOLATION_DESC);
            boolean foundViolation = false;
            if (hashKeyViolationType.equals("Size Violation")) {
                foundViolation = true;
                hashToRangeHashSizeViolationsMap.put(hashKey, rangeKey);
                if (violatedValueExists) {
                    hashToGsiHashSizeViolationsMap.put(hashKey, gsiHashKeyValue);
                }
                Assert.assertTrue("Gsi hash key size violation description is incorrect",
                        hashKeyViolationDesc.equals("Max Bytes Allowed: " + TestUtils.MAX_HASH_KEY_SIZE
                                + " Found: " + violatedSize));
            } else if (hashKeyViolationType.equals("Type Violation")) {
                foundViolation = true;
                hashToRangeHashTypeViolationsMap.put(hashKey, rangeKey);
                if (violatedValueExists) {
                    hashToGsiHashTypeViolationsMap.put(hashKey, gsiHashKeyValue);
                }
                Assert.assertTrue("Gsi hash key type violation description is incorrect",
                        hashKeyViolationDesc.equals("Expected: " + gsiHashKeyType + " Found: "
                                + TestUtils.returnDifferentAttributeType(gsiHashKeyType)));
            } else {
                Assert.assertTrue("Hash key violation description exists even when there is no violation type",
                        hashKeyViolationDesc.equals(""));
            }

            if (rangeKeyViolationType.equals("Size Violation")) {
                foundViolation = true;
                hashToRangeRangeSizeViolationsMap.put(hashKey, rangeKey);
                if (violatedValueExists) {
                    hashToGsiRangeSizeViolationsMap.put(hashKey, gsiRangeKeyValue);
                }
                Assert.assertTrue("GSI range key size violation description is incorrect",
                        rangeKeyViolationDesc.equals("Max Bytes Allowed: " + TestUtils.MAX_RANGE_KEY_SIZE
                                + " Found: " + violatedSize));
            } else if (rangeKeyViolationType.equals("Type Violation")) {
                foundViolation = true;
                hashToRangeRangeTypeViolationsMap.put(hashKey, rangeKey);
                if (violatedValueExists) {
                    hashToGsiRangeTypeViolationsMap.put(hashKey, gsiRangeKeyValue);
                }
                Assert.assertTrue("Gsi range key type violation description is incorrect",
                        rangeKeyViolationDesc.equals("Expected: " + gsiRangeKeyType + " Found: "
                                + TestUtils.returnDifferentAttributeType(gsiRangeKeyType)));
            } else {
                Assert.assertTrue("Range key violation description exists even when there is no violation type",
                        rangeKeyViolationDesc.equals(""));
            }

            Assert.assertTrue("No violation found in a row!", foundViolation);
        }

        if (hashKeySizeViolations != null) {
            for (Map<String, AttributeValue> item : hashKeySizeViolations) {
                AttributeValue tableHashAttr = item.get(tableHashKey);
                String expectedTableHashKey = AttributeValueConverter.toBlankString(tableHashAttr);

                if (hashToRangeHashSizeViolationsMap.containsKey(expectedTableHashKey)) {
                    if (tableRangeKey != null) {
                        AttributeValue tableRangeAttr = item.get(tableRangeKey);
                        String expectedTableRangeKey = AttributeValueConverter.toBlankString(tableRangeAttr);
                        Assert.assertEquals(
                                "Size violated GSI hash key's table's hash key's range key does not match in the output!",
                                expectedTableRangeKey,
                                hashToRangeHashSizeViolationsMap.get(expectedTableHashKey));
                    }
                    hashToRangeHashSizeViolationsMap.remove(expectedTableHashKey);
                } else {
                    Assert.fail("Expected size violation on hash key not found!");
                }

                // Check for gsi hash value
                if (violatedValueExists) {
                    AttributeValue gsiHashAttr = item.get(gsiHashKeyName);
                    String expectedGsiHashValue = AttributeValueConverter
                            .toStringWithAttributeType(gsiHashAttr);
                    Assert.assertEquals("Size violated Gsi hash value mis-match", expectedGsiHashValue,
                            hashToGsiHashSizeViolationsMap.get(expectedTableHashKey));
                    hashToGsiHashSizeViolationsMap.remove(expectedTableHashKey);
                }
            }
            Assert.assertEquals("Extra entries found for gsi hash key size violations", 0,
                    hashToRangeHashSizeViolationsMap.size());
            Assert.assertEquals("Extra entries found for gsi hash key size violation values", 0,
                    hashToGsiHashSizeViolationsMap.size());
        }

        if (rangeKeySizeViolations != null) {
            for (Map<String, AttributeValue> item : rangeKeySizeViolations) {
                AttributeValue tableHashAttr = item.get(tableHashKey);
                String expectedTableHashKey = AttributeValueConverter.toBlankString(tableHashAttr);

                if (hashToRangeRangeSizeViolationsMap.containsKey(expectedTableHashKey)) {
                    if (tableRangeKey != null) {
                        AttributeValue tableRangeAttr = item.get(tableRangeKey);
                        String expectedTableRangeKey = AttributeValueConverter.toBlankString(tableRangeAttr);
                        Assert.assertEquals(
                                "Size violated GSI range key's table's hash key's range key does not match in the output!",
                                expectedTableRangeKey,
                                hashToRangeRangeSizeViolationsMap.get(expectedTableHashKey));
                    }
                    hashToRangeRangeSizeViolationsMap.remove(expectedTableHashKey);
                } else {
                    Assert.fail("Expected size violation on range key not found!");
                }

                // Check for gsi range value
                if (violatedValueExists) {
                    AttributeValue gsiRangeAttr = item.get(gsiRangeKeyName);
                    String expectedGsiRangeValue = AttributeValueConverter
                            .toStringWithAttributeType(gsiRangeAttr);
                    Assert.assertEquals("Size violated Gsi range value mis-match", expectedGsiRangeValue,
                            hashToGsiRangeSizeViolationsMap.get(expectedTableHashKey));
                    hashToGsiRangeSizeViolationsMap.remove(expectedTableHashKey);
                }
            }

            Assert.assertEquals("Extra entries found for gsi range key size violations", 0,
                    hashToRangeRangeSizeViolationsMap.size());
            Assert.assertEquals("Extra entries found for gsi range key size violation values", 0,
                    hashToGsiRangeSizeViolationsMap.size());
        }

        if (hashKeyTypeViolations != null) {
            for (Map<String, AttributeValue> item : hashKeyTypeViolations) {
                AttributeValue tableHashAttr = item.get(tableHashKey);
                String expectedTableHashKey = AttributeValueConverter.toBlankString(tableHashAttr);

                if (hashToRangeHashTypeViolationsMap.containsKey(expectedTableHashKey)) {
                    if (tableRangeKey != null) {
                        AttributeValue tableRangeAttr = item.get(tableRangeKey);
                        String exptectedTableRangeKey = AttributeValueConverter.toBlankString(tableRangeAttr);
                        Assert.assertEquals(
                                "Type violated GSI hash key's table's hash key's range key does not match in the output!",
                                exptectedTableRangeKey,
                                hashToRangeHashTypeViolationsMap.get(expectedTableHashKey));
                    }
                    hashToRangeHashTypeViolationsMap.remove(expectedTableHashKey);
                } else {
                    Assert.fail("Expected type violation on hash key not found!");
                }

                // Check for gsi hash value
                if (violatedValueExists) {
                    AttributeValue gsiHashAttr = item.get(gsiHashKeyName);
                    String expectedGsiHashValue = AttributeValueConverter
                            .toStringWithAttributeType(gsiHashAttr);
                    Assert.assertEquals("Type violated Gsi hash value mis-match", expectedGsiHashValue,
                            hashToGsiHashTypeViolationsMap.get(expectedTableHashKey));
                    hashToGsiHashTypeViolationsMap.remove(expectedTableHashKey);
                }
            }
            Assert.assertEquals("Extra entries found for gsi hash key type violations", 0,
                    hashToRangeHashTypeViolationsMap.size());
            Assert.assertEquals("Extra entries found for gsi hash key type violation values", 0,
                    hashToGsiHashTypeViolationsMap.size());
        }

        if (rangeKeyTypeViolations != null) {
            for (Map<String, AttributeValue> item : rangeKeyTypeViolations) {
                AttributeValue tableHashAttr = item.get(tableHashKey);
                String expectedTableHashKey = AttributeValueConverter.toBlankString(tableHashAttr);

                if (hashToRangeRangeTypeViolationsMap.containsKey(expectedTableHashKey)) {
                    if (tableRangeKey != null) {
                        AttributeValue tableRangeAttr = item.get(tableRangeKey);
                        String exptectedTableRangeKey = AttributeValueConverter.toBlankString(tableRangeAttr);
                        Assert.assertEquals(
                                "Type violated GSI range key's table's hash key's range key does not match in the output!",
                                exptectedTableRangeKey,
                                hashToRangeRangeTypeViolationsMap.get(expectedTableHashKey));
                    }
                    hashToRangeRangeTypeViolationsMap.remove(expectedTableHashKey);
                } else {
                    Assert.fail("Expected type violation on range key not found!");
                }

                // Check for gsi range value
                if (violatedValueExists) {
                    AttributeValue gsiRangeAttr = item.get(gsiRangeKeyName);
                    String expectedGsiRangeValue = AttributeValueConverter
                            .toStringWithAttributeType(gsiRangeAttr);
                    Assert.assertEquals("Type violated Gsi range value mis-match", expectedGsiRangeValue,
                            hashToGsiRangeTypeViolationsMap.get(expectedTableHashKey));
                    hashToGsiRangeTypeViolationsMap.remove(expectedTableHashKey);
                }
            }
            Assert.assertEquals("Extra entries found for gsi range key type violations", 0,
                    hashToRangeRangeTypeViolationsMap.size());
            Assert.assertEquals("Extra entries found for gsi range key type violation values", 0,
                    hashToGsiRangeTypeViolationsMap.size());
        }

    } finally {
        br.close();
        parser.close();
    }
}

From source file:com.amazonaws.services.dynamodbv2.online.index.integration.tests.ViolationCorrectionTest.java

/**
 * Validates the output of violation correction.
 *//*from w w w.  ja v a  2s  . c  o m*/
private void validateCorrectionOutput(String correctionOutputFile, List<List<String>> errorRecords)
        throws IOException {
    BufferedReader br = null;
    CSVParser parser = null;
    try {
        br = new BufferedReader(new FileReader(new File(correctionOutputFile)));
        parser = new CSVParser(br, TestUtils.csvFormat);
        List<CSVRecord> records = parser.getRecords();

        Assert.assertEquals("Error record count does not match", errorRecords.size(), records.size());
        for (CSVRecord record : records) {
            boolean foundError = false;
            List<String> readRecord = new ArrayList<String>();
            for (int i = 0; i < record.size(); i++) {
                if (record.get(i).equals(record.get(ViolationRecord.GSI_VALUE_UPDATE_ERROR))) {
                    foundError = true;
                    continue;
                } else {
                    readRecord.add(record.get(i));
                }
            }
            Assert.assertTrue("Error column not found", foundError);
            Assert.assertTrue("Unexpected record read from correction output",
                    errorRecords.contains(readRecord));
            errorRecords.remove(readRecord);
        }
    } finally {
        br.close();
        parser.close();
    }
}

From source file:edu.nyu.vida.data_polygamy.utils.FrameworkUtils.java

public static String[] splitStr(String val) throws IOException {

    CSVParser parser = new CSVParser(new StringReader(val), CSVFormat.DEFAULT);
    CSVRecord record = parser.getRecords().get(0);
    Iterator<String> valuesIt = record.iterator();
    String[] input = new String[record.size()];
    int i = 0;/* w  w w .  jav a2  s  .  c  o  m*/
    while (valuesIt.hasNext()) {
        input[i] = valuesIt.next();
        i++;
    }
    parser.close();
    return input;
}

From source file:edu.nyu.vida.data_polygamy.utils.FrameworkUtils.java

/**
 * String Parsing /*from ww w  .  j av a2  s .  c o m*/
 */

public static String[] splitStr(String val, Integer len) throws IOException {

    String[] input;

    try {
        CSVParser parser = new CSVParser(new StringReader(val), CSVFormat.DEFAULT);
        CSVRecord record = parser.getRecords().get(0);
        input = new String[len];
        Iterator<String> valuesIt = record.iterator();
        int i = 0;
        while (valuesIt.hasNext()) {
            input[i] = valuesIt.next().trim();
            i++;
        }
        parser.close();
    } catch (ArrayIndexOutOfBoundsException e) {
        input = val.split(",", len);
        for (int i = 0; i < input.length; i++)
            input[i] = input[i].trim();
    }

    return input;
}

From source file:edu.harvard.iq.dataverse.ingest.tabulardata.impl.plugins.csv.CSVFileReader.java

public int readFile(BufferedReader csvReader, DataTable dataTable, PrintWriter finalOut) throws IOException {

    List<DataVariable> variableList = new ArrayList<>();
    CSVParser parser = new CSVParser(csvReader, inFormat.withHeader());
    Map<String, Integer> headers = parser.getHeaderMap();

    int i = 0;/*from   w w  w  .  j  ava 2  s .  c o m*/
    for (String varName : headers.keySet()) {
        if (varName == null || varName.isEmpty()) {
            // TODO:
            // Add a sensible variable name validation algorithm.
            // -- L.A. 4.0 alpha 1
            throw new IOException(BundleUtil.getStringFromBundle("ingest.csv.invalidHeader"));
        }

        DataVariable dv = new DataVariable();
        dv.setName(varName);
        dv.setLabel(varName);
        dv.setInvalidRanges(new ArrayList<>());
        dv.setSummaryStatistics(new ArrayList<>());
        dv.setUnf("UNF:6:NOTCALCULATED");
        dv.setCategories(new ArrayList<>());
        variableList.add(dv);

        dv.setTypeCharacter();
        dv.setIntervalDiscrete();
        dv.setFileOrder(i);
        dv.setDataTable(dataTable);
        i++;
    }

    dataTable.setVarQuantity((long) variableList.size());
    dataTable.setDataVariables(variableList);

    boolean[] isNumericVariable = new boolean[headers.size()];
    boolean[] isIntegerVariable = new boolean[headers.size()];
    boolean[] isTimeVariable = new boolean[headers.size()];
    boolean[] isDateVariable = new boolean[headers.size()];

    for (i = 0; i < headers.size(); i++) {
        // OK, let's assume that every variable is numeric;
        // but we'll go through the file and examine every value; the
        // moment we find a value that's not a legit numeric one, we'll
        // assume that it is in fact a String.
        isNumericVariable[i] = true;
        isIntegerVariable[i] = true;
        isDateVariable[i] = true;
        isTimeVariable[i] = true;
    }

    // First, "learning" pass.
    // (we'll save the incoming stream in another temp file:)
    SimpleDateFormat[] selectedDateTimeFormat = new SimpleDateFormat[headers.size()];
    SimpleDateFormat[] selectedDateFormat = new SimpleDateFormat[headers.size()];

    File firstPassTempFile = File.createTempFile("firstpass-", ".csv");

    try (CSVPrinter csvFilePrinter = new CSVPrinter(
            // TODO allow other parsers of tabular data to use this parser by changin inFormat
            new FileWriter(firstPassTempFile.getAbsolutePath()), inFormat)) {
        //Write  headers
        csvFilePrinter.printRecord(headers.keySet());
        for (CSVRecord record : parser.getRecords()) {
            // Checks if #records = #columns in header
            if (!record.isConsistent()) {
                List<String> args = Arrays.asList(new String[] { "" + (parser.getCurrentLineNumber() - 1),
                        "" + headers.size(), "" + record.size() });
                throw new IOException(BundleUtil.getStringFromBundle("ingest.csv.recordMismatch", args));
            }

            for (i = 0; i < headers.size(); i++) {
                String varString = record.get(i);
                isIntegerVariable[i] = isIntegerVariable[i] && varString != null
                        && (varString.isEmpty() || varString.equals("null")
                                || (firstNumCharSet.contains(varString.charAt(0))
                                        && StringUtils.isNumeric(varString.substring(1))));
                if (isNumericVariable[i]) {
                    // If variable might be "numeric" test to see if this value is a parsable number:
                    if (varString != null && !varString.isEmpty()) {

                        boolean isNumeric = false;
                        boolean isInteger = false;

                        if (varString.equalsIgnoreCase("NaN") || varString.equalsIgnoreCase("NA")
                                || varString.equalsIgnoreCase("Inf") || varString.equalsIgnoreCase("+Inf")
                                || varString.equalsIgnoreCase("-Inf") || varString.equalsIgnoreCase("null")) {
                            continue;
                        } else {
                            try {
                                Double testDoubleValue = new Double(varString);
                                continue;
                            } catch (NumberFormatException ex) {
                                // the token failed to parse as a double
                                // so the column is a string variable.
                            }
                        }
                        isNumericVariable[i] = false;
                    }
                }

                // If this is not a numeric column, see if it is a date collumn
                // by parsing the cell as a date or date-time value:
                if (!isNumericVariable[i]) {

                    Date dateResult = null;

                    if (isTimeVariable[i]) {
                        if (varString != null && !varString.isEmpty()) {
                            boolean isTime = false;

                            if (selectedDateTimeFormat[i] != null) {
                                ParsePosition pos = new ParsePosition(0);
                                dateResult = selectedDateTimeFormat[i].parse(varString, pos);

                                if (dateResult != null && pos.getIndex() == varString.length()) {
                                    // OK, successfully parsed a value!
                                    isTime = true;
                                }
                            } else {
                                for (SimpleDateFormat format : TIME_FORMATS) {
                                    ParsePosition pos = new ParsePosition(0);
                                    dateResult = format.parse(varString, pos);
                                    if (dateResult != null && pos.getIndex() == varString.length()) {
                                        // OK, successfully parsed a value!
                                        isTime = true;
                                        selectedDateTimeFormat[i] = format;
                                        break;
                                    }
                                }
                            }
                            if (!isTime) {
                                isTimeVariable[i] = false;
                                // if the token didn't parse as a time value,
                                // we will still try to parse it as a date, below.
                                // unless this column is NOT a date.
                            } else {
                                // And if it is a time value, we are going to assume it's
                                // NOT a date.
                                isDateVariable[i] = false;
                            }
                        }
                    }

                    if (isDateVariable[i]) {
                        if (varString != null && !varString.isEmpty()) {
                            boolean isDate = false;

                            // TODO:
                            // Strictly speaking, we should be doing the same thing
                            // here as with the time formats above; select the
                            // first one that works, then insist that all the
                            // other values in this column match it... but we
                            // only have one, as of now, so it should be ok.
                            // -- L.A. 4.0 beta
                            for (SimpleDateFormat format : DATE_FORMATS) {
                                // Strict parsing - it will throw an
                                // exception if it doesn't parse!
                                format.setLenient(false);
                                try {
                                    format.parse(varString);
                                    isDate = true;
                                    selectedDateFormat[i] = format;
                                    break;
                                } catch (ParseException ex) {
                                    //Do nothing
                                }
                            }
                            isDateVariable[i] = isDate;
                        }
                    }
                }
            }

            csvFilePrinter.printRecord(record);
        }
    }
    dataTable.setCaseQuantity(parser.getRecordNumber());
    parser.close();
    csvReader.close();

    // Re-type the variables that we've determined are numerics:
    for (i = 0; i < headers.size(); i++) {
        if (isNumericVariable[i]) {
            dataTable.getDataVariables().get(i).setTypeNumeric();

            if (isIntegerVariable[i]) {
                dataTable.getDataVariables().get(i).setIntervalDiscrete();
            } else {
                dataTable.getDataVariables().get(i).setIntervalContinuous();
            }
        } else if (isDateVariable[i] && selectedDateFormat[i] != null) {
            // Dates are still Strings, i.e., they are "character" and "discrete";
            // But we add special format values for them:
            dataTable.getDataVariables().get(i).setFormat(DATE_FORMATS[0].toPattern());
            dataTable.getDataVariables().get(i).setFormatCategory("date");
        } else if (isTimeVariable[i] && selectedDateTimeFormat[i] != null) {
            // Same for time values:
            dataTable.getDataVariables().get(i).setFormat(selectedDateTimeFormat[i].toPattern());
            dataTable.getDataVariables().get(i).setFormatCategory("time");
        }
    }
    // Second, final pass.
    try (BufferedReader secondPassReader = new BufferedReader(new FileReader(firstPassTempFile))) {
        parser = new CSVParser(secondPassReader, inFormat.withHeader());
        String[] caseRow = new String[headers.size()];

        for (CSVRecord record : parser) {
            if (!record.isConsistent()) {
                List<String> args = Arrays.asList(new String[] { "" + (parser.getCurrentLineNumber() - 1),
                        "" + headers.size(), "" + record.size() });
                throw new IOException(BundleUtil.getStringFromBundle("ingest.csv.recordMismatch", args));
            }

            for (i = 0; i < headers.size(); i++) {
                String varString = record.get(i);
                if (isNumericVariable[i]) {
                    if (varString == null || varString.isEmpty() || varString.equalsIgnoreCase("NA")) {
                        // Missing value - represented as an empty string in
                        // the final tab file
                        caseRow[i] = "";
                    } else if (varString.equalsIgnoreCase("NaN")) {
                        // "Not a Number" special value:
                        caseRow[i] = "NaN";
                    } else if (varString.equalsIgnoreCase("Inf") || varString.equalsIgnoreCase("+Inf")) {
                        // Positive infinity:
                        caseRow[i] = "Inf";
                    } else if (varString.equalsIgnoreCase("-Inf")) {
                        // Negative infinity:
                        caseRow[i] = "-Inf";
                    } else if (varString.equalsIgnoreCase("null")) {
                        // By request from Gus - "NULL" is recognized as a
                        // numeric zero:
                        caseRow[i] = isIntegerVariable[i] ? "0" : "0.0";
                    } else {
                        /* No re-formatting is done on any other numeric values.
                         * We'll save them as they were, for archival purposes.
                         * The alternative solution - formatting in sci. notation
                         * is commented-out below.
                         */
                        caseRow[i] = varString;
                        /*
                         if (isIntegerVariable[i]) {
                        try {
                            Integer testIntegerValue = new Integer(varString);
                            caseRow[i] = testIntegerValue.toString();
                        } catch (NumberFormatException ex) {
                            throw new IOException("Failed to parse a value recognized as an integer in the first pass! (?)");
                        }
                        } else {
                        try {
                            Double testDoubleValue = new Double(varString);
                            if (testDoubleValue.equals(0.0)) {
                                caseRow[i] = "0.0";
                            } else {
                                                                    // One possible implementation:
                                //
                                // Round our fractional values to 15 digits
                                // (minimum number of digits of precision guaranteed by
                                // type Double) and format the resulting representations
                                // in a IEEE 754-like "scientific notation" - for ex.,
                                // 753.24 will be encoded as 7.5324e2
                                BigDecimal testBigDecimal = new BigDecimal(varString, doubleMathContext);
                                caseRow[i] = String.format(FORMAT_IEEE754, testBigDecimal);
                                
                                // Strip meaningless zeros and extra + signs:
                                caseRow[i] = caseRow[i].replaceFirst("00*e", "e");
                                caseRow[i] = caseRow[i].replaceFirst("\\.e", ".0e");
                                caseRow[i] = caseRow[i].replaceFirst("e\\+00", "");
                                caseRow[i] = caseRow[i].replaceFirst("^\\+", "");
                            }
                        } catch (NumberFormatException ex) {
                            throw new IOException("Failed to parse a value recognized as numeric in the first pass! (?)");
                        }
                        }
                         */
                    }
                } else if (isTimeVariable[i] || isDateVariable[i]) {
                    // Time and Dates are stored NOT quoted (don't ask).
                    if (varString != null) {
                        // Dealing with quotes:
                        // remove the leading and trailing quotes, if present:
                        varString = varString.replaceFirst("^\"*", "");
                        varString = varString.replaceFirst("\"*$", "");
                        caseRow[i] = varString;
                    } else {
                        caseRow[i] = "";
                    }
                } else {
                    // Treat as a String:
                    // Strings are stored in tab files quoted;
                    // Missing values are stored as an empty string
                    // between two tabs (or one tab and the new line);
                    // Empty strings stored as "" (quoted empty string).
                    // For the purposes  of this CSV ingest reader, we are going
                    // to assume that all the empty strings in the file are
                    // indeed empty strings, and NOT missing values:
                    if (varString != null) {
                        // escape the quotes, newlines, and tabs:
                        varString = varString.replace("\"", "\\\"");
                        varString = varString.replace("\n", "\\n");
                        varString = varString.replace("\t", "\\t");
                        // final pair of quotes:
                        varString = "\"" + varString + "\"";
                        caseRow[i] = varString;
                    } else {
                        caseRow[i] = "\"\"";
                    }
                }
            }
            finalOut.println(StringUtils.join(caseRow, "\t"));
        }
    }
    long linecount = parser.getRecordNumber();
    finalOut.close();
    parser.close();
    dbglog.fine("Tmp File: " + firstPassTempFile);
    // Firstpass file is deleted to prevent tmp from filling up.
    firstPassTempFile.delete();
    if (dataTable.getCaseQuantity().intValue() != linecount) {
        List<String> args = Arrays
                .asList(new String[] { "" + dataTable.getCaseQuantity().intValue(), "" + linecount });
        throw new IOException(BundleUtil.getStringFromBundle("ingest.csv.line_mismatch", args));
    }
    return (int) linecount;
}

From source file:net.tradelib.core.Series.java

static public Series fromCsv(String path, boolean header, DateTimeFormatter dtf, LocalTime lt)
        throws Exception {

    if (dtf == null) {
        if (lt == null)
            dtf = DateTimeFormatter.ISO_LOCAL_DATE_TIME;
        else//from ww  w.jav  a  2 s  . co  m
            dtf = DateTimeFormatter.ISO_DATE;
    }

    // Parse and import the csv
    CSVFormat csvFmt = CSVFormat.DEFAULT.withCommentMarker('#').withIgnoreSurroundingSpaces();
    if (header)
        csvFmt = csvFmt.withHeader();
    CSVParser csv = csvFmt.parse(new BufferedReader(new FileReader(path)));

    int ncols = -1;
    Series result = null;
    double[] values = null;

    for (CSVRecord rec : csv.getRecords()) {
        if (result == null) {
            ncols = rec.size() - 1;
            values = new double[ncols];
            result = new Series(ncols);
        }

        for (int ii = 0; ii < ncols; ++ii) {
            values[ii] = Double.parseDouble(rec.get(ii + 1));
        }

        LocalDateTime ldt;
        if (lt != null) {
            ldt = LocalDate.parse(rec.get(0), dtf).atTime(lt);
        } else {
            ldt = LocalDateTime.parse(rec.get(0), dtf);
        }

        result.append(ldt, values);
    }

    if (header) {
        Map<String, Integer> headerMap = csv.getHeaderMap();
        result.clearNames();
        for (Map.Entry<String, Integer> me : headerMap.entrySet()) {
            if (me.getValue() > 0)
                result.setName(me.getKey(), me.getValue() - 1);
        }
    }

    return result;
}

From source file:nl.utwente.trafficanalyzer.GeoTagger.java

public static List readCsvFile(File fileName) {

    FileReader fileReader = null;
    CSVParser csvFileParser = null;

    //Create the CSVFormat object with the header mapping
    CSVFormat csvFileFormat = CSVFormat.DEFAULT;

    try {/*from  w  w w.j  a va 2s  . co  m*/

        //initialize FileReader object
        fileReader = new FileReader(fileName);

        //initialize CSVParser object
        csvFileParser = new CSVParser(fileReader, csvFileFormat);

        //Get a list of CSV file records
        List csvRecords = csvFileParser.getRecords();

        return csvRecords;
    } catch (Exception e) {
        System.out.println("Error in CsvFileReader !!!");
        e.printStackTrace();

    } finally {
        try {
            fileReader.close();
            csvFileParser.close();
        } catch (IOException e) {
            System.out.println("Error while closing fileReader/csvFileParser !!!");
            e.printStackTrace();
        }
    }
    return null;

}

From source file:no.uio.medicine.virsurveillance.parsers.CSVsGBDdata.java

public void parse(String deathFolder) throws IOException {
    File f = new File(deathFolder);
    Runtime runtime = Runtime.getRuntime();
    if (f.isDirectory()) {
        String[] filesInDir = f.list();

        for (String fil : filesInDir) {
            if (fil.endsWith(".zip")) {
                ZipFile zipFile = new ZipFile(deathFolder + "/" + fil);

                Enumeration<? extends ZipEntry> entries = zipFile.entries();

                while (entries.hasMoreElements()) {
                    System.out.println(
                            "Used memory: " + (runtime.totalMemory() - runtime.freeMemory()) / (1024 * 1024)
                                    + " Free memory: " + (runtime.freeMemory()) / (1024 * 1024));

                    ZipEntry entry = entries.nextElement();
                    InputStream stream = zipFile.getInputStream(entry);
                    BufferedReader br = new BufferedReader(new InputStreamReader(stream, "UTF-8"));
                    CSVParser parser = CSVFormat.RFC4180.withDelimiter(',').withIgnoreEmptyLines().withHeader()
                            .parse(br);//from  w w  w  .  ja  va 2  s. co  m

                    List<CSVRecord> records = parser.getRecords();
                    System.out.println("Reading records: " + zipFile.getName() + "/" + entry);
                    /*for (int i=0;i<records.size();i++) {
                    CSVRecord csvRecord = records.get(i);*/
                    for (CSVRecord csvRecord : records) {
                        if (csvRecord.isMapped("age_group_id")) { //age group 22 corresponds to all ages
                            if (csvRecord.get("age_group_id").equalsIgnoreCase("22")) {
                                String location = null;
                                String year = null;
                                String sex = null;
                                String cause = null;
                                String number = null;
                                String metric = null;

                                if (csvRecord.isMapped("location_code")) {
                                    location = csvRecord.get("location_code");
                                }
                                if (csvRecord.isMapped("year")) {
                                    year = csvRecord.get("year");
                                }
                                if (csvRecord.isMapped("sex_id")) { //1=male, 2 = female
                                    if (csvRecord.get("sex_id").equalsIgnoreCase(("1"))) {
                                        sex = "m";
                                    } else if (csvRecord.get("sex_id").equalsIgnoreCase("2")) {
                                        sex = "f";
                                    }
                                }
                                if (csvRecord.isMapped("cause_name")) {
                                    cause = csvRecord.get("cause_name");
                                }
                                if (csvRecord.isMapped("mean")) {
                                    number = csvRecord.get("mean");
                                }
                                if (csvRecord.isMapped("metric") && csvRecord.isMapped("unit")) {
                                    metric = csvRecord.get("metric") + "-" + csvRecord.get("unit");
                                }

                                if (location != null && year != null && sex != null && cause != null
                                        && number != null && metric != null) {
                                    try {
                                        sqlM.addSanitaryIssueToCountry(location, year, sex, cause, metric,
                                                number);
                                    } catch (SQLException ex) {

                                        Logger.getLogger(CSVsGBDdata.class.getName()).log(Level.SEVERE, null,
                                                ex);
                                    }
                                }

                            }

                        }

                    }

                    parser.close();

                    stream.close();
                    br.close();
                }
                zipFile.close();
            }
        }
    } else {
        System.out.println("Not a directory");
    }
}

From source file:org.apache.ambari.server.api.services.serializers.CsvSerializerTest.java

@Test
public void testSerializeResources_NoColumnInfo() throws Exception {
    Result result = new ResultImpl(true);
    result.setResultStatus(new ResultStatus(ResultStatus.STATUS.OK));
    TreeNode<Resource> tree = result.getResultTree();

    List<TreeMap<String, Object>> data = new ArrayList<TreeMap<String, Object>>() {
        {/*w  w w . java  2 s  .  c om*/
            add(new TreeMap<String, Object>() {
                {
                    put("property1", "value1a");
                    put("property2", "value2a");
                    put("property3", "value3a");
                    put("property4", "value4a");
                }
            });
            add(new TreeMap<String, Object>() {
                {
                    put("property1", "value1'b");
                    put("property2", "value2'b");
                    put("property3", "value3'b");
                    put("property4", "value4'b");
                }
            });
            add(new TreeMap<String, Object>() {
                {
                    put("property1", "value1,c");
                    put("property2", "value2,c");
                    put("property3", "value3,c");
                    put("property4", "value4,c");
                }
            });
        }
    };

    tree.setName("items");
    tree.setProperty("isCollection", "true");

    addChildResource(tree, "resource", 0, data.get(0));
    addChildResource(tree, "resource", 1, data.get(1));
    addChildResource(tree, "resource", 2, data.get(2));

    replayAll();

    //execute test
    Object o = new CsvSerializer().serialize(result).toString().replace("\r", "");

    verifyAll();

    assertNotNull(o);

    StringReader reader = new StringReader(o.toString());
    CSVParser csvParser = new CSVParser(reader, CSVFormat.DEFAULT);
    List<CSVRecord> records = csvParser.getRecords();

    assertNotNull(records);
    assertEquals(3, records.size());

    int i = 0;
    for (CSVRecord record : records) {
        TreeMap<String, Object> actualData = data.get(i++);
        assertEquals(actualData.size(), record.size());

        for (String item : record) {
            assertTrue(actualData.containsValue(item));
        }
    }

    csvParser.close();
}

From source file:org.apache.beam.sdk.extensions.sql.impl.schema.BeamTableUtils.java

public static BeamRecord csvLine2BeamSqlRow(CSVFormat csvFormat, String line,
        BeamRecordSqlType beamRecordSqlType) {
    List<Object> fieldsValue = new ArrayList<>(beamRecordSqlType.getFieldCount());
    try (StringReader reader = new StringReader(line)) {
        CSVParser parser = csvFormat.parse(reader);
        CSVRecord rawRecord = parser.getRecords().get(0);

        if (rawRecord.size() != beamRecordSqlType.getFieldCount()) {
            throw new IllegalArgumentException(String.format("Expect %d fields, but actually %d",
                    beamRecordSqlType.getFieldCount(), rawRecord.size()));
        } else {/*w w  w. j  ava 2 s .  c  o  m*/
            for (int idx = 0; idx < beamRecordSqlType.getFieldCount(); idx++) {
                String raw = rawRecord.get(idx);
                fieldsValue.add(autoCastField(beamRecordSqlType.getFieldTypeByIndex(idx), raw));
            }
        }
    } catch (IOException e) {
        throw new IllegalArgumentException("decodeRecord failed!", e);
    }
    return new BeamRecord(beamRecordSqlType, fieldsValue);
}