Example usage for org.apache.commons.csv CSVParser getRecords

List of usage examples for org.apache.commons.csv CSVParser getRecords

Introduction

In this page you can find the example usage for org.apache.commons.csv CSVParser getRecords.

Prototype

public List<CSVRecord> getRecords() throws IOException 

Source Link

Document

Parses the CSV input according to the given format and returns the content as a list of CSVRecord CSVRecords .

Usage

From source file:com.amazonaws.services.dynamodbv2.online.index.integration.tests.ViolationDetectionTest.java

/**
 * Verifies the contents of the violation detection output file
 *///w  w w . j a  va  2  s.  com
private void checkViolationFileOutput(String detectorOpFile, String tableHashKey, String tableRangeKey,
        String tableHashKeyType, String tableRangeKeyType, boolean violatedValueExists,
        List<Map<String, AttributeValue>> hashKeySizeViolations,
        List<Map<String, AttributeValue>> rangeKeySizeViolations,
        List<Map<String, AttributeValue>> hashKeyTypeViolations,
        List<Map<String, AttributeValue>> rangeKeyTypeViolations, int violatedSize, String gsiHashKeyName,
        String gsiHashKeyType, String gsiRangeKeyName, String gsiRangeKeyType) throws IOException {

    Map<String, String> hashToRangeHashSizeViolationsMap = new HashMap<String, String>();
    Map<String, String> hashToRangeRangeSizeViolationsMap = new HashMap<String, String>();
    Map<String, String> hashToRangeHashTypeViolationsMap = new HashMap<String, String>();
    Map<String, String> hashToRangeRangeTypeViolationsMap = new HashMap<String, String>();

    Map<String, String> hashToGsiHashSizeViolationsMap = new HashMap<String, String>();
    Map<String, String> hashToGsiRangeSizeViolationsMap = new HashMap<String, String>();
    Map<String, String> hashToGsiHashTypeViolationsMap = new HashMap<String, String>();
    Map<String, String> hashToGsiRangeTypeViolationsMap = new HashMap<String, String>();

    BufferedReader br = null;
    CSVParser parser = null;
    try {
        br = new BufferedReader(new FileReader(new File(detectorOpFile)));
        parser = new CSVParser(br, TestUtils.csvFormat);
        List<CSVRecord> csvRecords = parser.getRecords();
        for (CSVRecord csvRecord : csvRecords) {
            String hashKey = csvRecord.get(ViolationRecord.TABLE_HASH_KEY);
            String rangeKey = csvRecord.get(ViolationRecord.TABLE_RANGE_KEY);
            String gsiHashKeyValue = null;
            if (violatedValueExists) {
                gsiHashKeyValue = csvRecord.get(ViolationRecord.GSI_HASH_KEY);
            }
            String hashKeyViolationType = csvRecord.get(ViolationRecord.GSI_HASH_KEY_VIOLATION_TYPE);
            String hashKeyViolationDesc = csvRecord.get(ViolationRecord.GSI_HASH_KEY_VIOLATION_DESC);
            String gsiRangeKeyValue = null;
            if (violatedValueExists) {
                gsiRangeKeyValue = csvRecord.get(ViolationRecord.GSI_RANGE_KEY);
            }
            String rangeKeyViolationType = csvRecord.get(ViolationRecord.GSI_RANGE_KEY_VIOLATION_TYPE);
            String rangeKeyViolationDesc = csvRecord.get(ViolationRecord.GSI_RANGE_KEY_VIOLATION_DESC);
            boolean foundViolation = false;
            if (hashKeyViolationType.equals("Size Violation")) {
                foundViolation = true;
                hashToRangeHashSizeViolationsMap.put(hashKey, rangeKey);
                if (violatedValueExists) {
                    hashToGsiHashSizeViolationsMap.put(hashKey, gsiHashKeyValue);
                }
                Assert.assertTrue("Gsi hash key size violation description is incorrect",
                        hashKeyViolationDesc.equals("Max Bytes Allowed: " + TestUtils.MAX_HASH_KEY_SIZE
                                + " Found: " + violatedSize));
            } else if (hashKeyViolationType.equals("Type Violation")) {
                foundViolation = true;
                hashToRangeHashTypeViolationsMap.put(hashKey, rangeKey);
                if (violatedValueExists) {
                    hashToGsiHashTypeViolationsMap.put(hashKey, gsiHashKeyValue);
                }
                Assert.assertTrue("Gsi hash key type violation description is incorrect",
                        hashKeyViolationDesc.equals("Expected: " + gsiHashKeyType + " Found: "
                                + TestUtils.returnDifferentAttributeType(gsiHashKeyType)));
            } else {
                Assert.assertTrue("Hash key violation description exists even when there is no violation type",
                        hashKeyViolationDesc.equals(""));
            }

            if (rangeKeyViolationType.equals("Size Violation")) {
                foundViolation = true;
                hashToRangeRangeSizeViolationsMap.put(hashKey, rangeKey);
                if (violatedValueExists) {
                    hashToGsiRangeSizeViolationsMap.put(hashKey, gsiRangeKeyValue);
                }
                Assert.assertTrue("GSI range key size violation description is incorrect",
                        rangeKeyViolationDesc.equals("Max Bytes Allowed: " + TestUtils.MAX_RANGE_KEY_SIZE
                                + " Found: " + violatedSize));
            } else if (rangeKeyViolationType.equals("Type Violation")) {
                foundViolation = true;
                hashToRangeRangeTypeViolationsMap.put(hashKey, rangeKey);
                if (violatedValueExists) {
                    hashToGsiRangeTypeViolationsMap.put(hashKey, gsiRangeKeyValue);
                }
                Assert.assertTrue("Gsi range key type violation description is incorrect",
                        rangeKeyViolationDesc.equals("Expected: " + gsiRangeKeyType + " Found: "
                                + TestUtils.returnDifferentAttributeType(gsiRangeKeyType)));
            } else {
                Assert.assertTrue("Range key violation description exists even when there is no violation type",
                        rangeKeyViolationDesc.equals(""));
            }

            Assert.assertTrue("No violation found in a row!", foundViolation);
        }

        if (hashKeySizeViolations != null) {
            for (Map<String, AttributeValue> item : hashKeySizeViolations) {
                AttributeValue tableHashAttr = item.get(tableHashKey);
                String expectedTableHashKey = AttributeValueConverter.toBlankString(tableHashAttr);

                if (hashToRangeHashSizeViolationsMap.containsKey(expectedTableHashKey)) {
                    if (tableRangeKey != null) {
                        AttributeValue tableRangeAttr = item.get(tableRangeKey);
                        String expectedTableRangeKey = AttributeValueConverter.toBlankString(tableRangeAttr);
                        Assert.assertEquals(
                                "Size violated GSI hash key's table's hash key's range key does not match in the output!",
                                expectedTableRangeKey,
                                hashToRangeHashSizeViolationsMap.get(expectedTableHashKey));
                    }
                    hashToRangeHashSizeViolationsMap.remove(expectedTableHashKey);
                } else {
                    Assert.fail("Expected size violation on hash key not found!");
                }

                // Check for gsi hash value
                if (violatedValueExists) {
                    AttributeValue gsiHashAttr = item.get(gsiHashKeyName);
                    String expectedGsiHashValue = AttributeValueConverter
                            .toStringWithAttributeType(gsiHashAttr);
                    Assert.assertEquals("Size violated Gsi hash value mis-match", expectedGsiHashValue,
                            hashToGsiHashSizeViolationsMap.get(expectedTableHashKey));
                    hashToGsiHashSizeViolationsMap.remove(expectedTableHashKey);
                }
            }
            Assert.assertEquals("Extra entries found for gsi hash key size violations", 0,
                    hashToRangeHashSizeViolationsMap.size());
            Assert.assertEquals("Extra entries found for gsi hash key size violation values", 0,
                    hashToGsiHashSizeViolationsMap.size());
        }

        if (rangeKeySizeViolations != null) {
            for (Map<String, AttributeValue> item : rangeKeySizeViolations) {
                AttributeValue tableHashAttr = item.get(tableHashKey);
                String expectedTableHashKey = AttributeValueConverter.toBlankString(tableHashAttr);

                if (hashToRangeRangeSizeViolationsMap.containsKey(expectedTableHashKey)) {
                    if (tableRangeKey != null) {
                        AttributeValue tableRangeAttr = item.get(tableRangeKey);
                        String expectedTableRangeKey = AttributeValueConverter.toBlankString(tableRangeAttr);
                        Assert.assertEquals(
                                "Size violated GSI range key's table's hash key's range key does not match in the output!",
                                expectedTableRangeKey,
                                hashToRangeRangeSizeViolationsMap.get(expectedTableHashKey));
                    }
                    hashToRangeRangeSizeViolationsMap.remove(expectedTableHashKey);
                } else {
                    Assert.fail("Expected size violation on range key not found!");
                }

                // Check for gsi range value
                if (violatedValueExists) {
                    AttributeValue gsiRangeAttr = item.get(gsiRangeKeyName);
                    String expectedGsiRangeValue = AttributeValueConverter
                            .toStringWithAttributeType(gsiRangeAttr);
                    Assert.assertEquals("Size violated Gsi range value mis-match", expectedGsiRangeValue,
                            hashToGsiRangeSizeViolationsMap.get(expectedTableHashKey));
                    hashToGsiRangeSizeViolationsMap.remove(expectedTableHashKey);
                }
            }

            Assert.assertEquals("Extra entries found for gsi range key size violations", 0,
                    hashToRangeRangeSizeViolationsMap.size());
            Assert.assertEquals("Extra entries found for gsi range key size violation values", 0,
                    hashToGsiRangeSizeViolationsMap.size());
        }

        if (hashKeyTypeViolations != null) {
            for (Map<String, AttributeValue> item : hashKeyTypeViolations) {
                AttributeValue tableHashAttr = item.get(tableHashKey);
                String expectedTableHashKey = AttributeValueConverter.toBlankString(tableHashAttr);

                if (hashToRangeHashTypeViolationsMap.containsKey(expectedTableHashKey)) {
                    if (tableRangeKey != null) {
                        AttributeValue tableRangeAttr = item.get(tableRangeKey);
                        String exptectedTableRangeKey = AttributeValueConverter.toBlankString(tableRangeAttr);
                        Assert.assertEquals(
                                "Type violated GSI hash key's table's hash key's range key does not match in the output!",
                                exptectedTableRangeKey,
                                hashToRangeHashTypeViolationsMap.get(expectedTableHashKey));
                    }
                    hashToRangeHashTypeViolationsMap.remove(expectedTableHashKey);
                } else {
                    Assert.fail("Expected type violation on hash key not found!");
                }

                // Check for gsi hash value
                if (violatedValueExists) {
                    AttributeValue gsiHashAttr = item.get(gsiHashKeyName);
                    String expectedGsiHashValue = AttributeValueConverter
                            .toStringWithAttributeType(gsiHashAttr);
                    Assert.assertEquals("Type violated Gsi hash value mis-match", expectedGsiHashValue,
                            hashToGsiHashTypeViolationsMap.get(expectedTableHashKey));
                    hashToGsiHashTypeViolationsMap.remove(expectedTableHashKey);
                }
            }
            Assert.assertEquals("Extra entries found for gsi hash key type violations", 0,
                    hashToRangeHashTypeViolationsMap.size());
            Assert.assertEquals("Extra entries found for gsi hash key type violation values", 0,
                    hashToGsiHashTypeViolationsMap.size());
        }

        if (rangeKeyTypeViolations != null) {
            for (Map<String, AttributeValue> item : rangeKeyTypeViolations) {
                AttributeValue tableHashAttr = item.get(tableHashKey);
                String expectedTableHashKey = AttributeValueConverter.toBlankString(tableHashAttr);

                if (hashToRangeRangeTypeViolationsMap.containsKey(expectedTableHashKey)) {
                    if (tableRangeKey != null) {
                        AttributeValue tableRangeAttr = item.get(tableRangeKey);
                        String exptectedTableRangeKey = AttributeValueConverter.toBlankString(tableRangeAttr);
                        Assert.assertEquals(
                                "Type violated GSI range key's table's hash key's range key does not match in the output!",
                                exptectedTableRangeKey,
                                hashToRangeRangeTypeViolationsMap.get(expectedTableHashKey));
                    }
                    hashToRangeRangeTypeViolationsMap.remove(expectedTableHashKey);
                } else {
                    Assert.fail("Expected type violation on range key not found!");
                }

                // Check for gsi range value
                if (violatedValueExists) {
                    AttributeValue gsiRangeAttr = item.get(gsiRangeKeyName);
                    String expectedGsiRangeValue = AttributeValueConverter
                            .toStringWithAttributeType(gsiRangeAttr);
                    Assert.assertEquals("Type violated Gsi range value mis-match", expectedGsiRangeValue,
                            hashToGsiRangeTypeViolationsMap.get(expectedTableHashKey));
                    hashToGsiRangeTypeViolationsMap.remove(expectedTableHashKey);
                }
            }
            Assert.assertEquals("Extra entries found for gsi range key type violations", 0,
                    hashToRangeRangeTypeViolationsMap.size());
            Assert.assertEquals("Extra entries found for gsi range key type violation values", 0,
                    hashToGsiRangeTypeViolationsMap.size());
        }

    } finally {
        br.close();
        parser.close();
    }
}

From source file:com.amazonaws.services.dynamodbv2.online.index.integration.tests.ViolationCorrectionTest.java

/**
 * Validates the output of violation correction.
 *//*from w w w.  ja v a  2s  . c  o m*/
private void validateCorrectionOutput(String correctionOutputFile, List<List<String>> errorRecords)
        throws IOException {
    BufferedReader br = null;
    CSVParser parser = null;
    try {
        br = new BufferedReader(new FileReader(new File(correctionOutputFile)));
        parser = new CSVParser(br, TestUtils.csvFormat);
        List<CSVRecord> records = parser.getRecords();

        Assert.assertEquals("Error record count does not match", errorRecords.size(), records.size());
        for (CSVRecord record : records) {
            boolean foundError = false;
            List<String> readRecord = new ArrayList<String>();
            for (int i = 0; i < record.size(); i++) {
                if (record.get(i).equals(record.get(ViolationRecord.GSI_VALUE_UPDATE_ERROR))) {
                    foundError = true;
                    continue;
                } else {
                    readRecord.add(record.get(i));
                }
            }
            Assert.assertTrue("Error column not found", foundError);
            Assert.assertTrue("Unexpected record read from correction output",
                    errorRecords.contains(readRecord));
            errorRecords.remove(readRecord);
        }
    } finally {
        br.close();
        parser.close();
    }
}

From source file:edu.nyu.vida.data_polygamy.utils.FrameworkUtils.java

public static String[] splitStr(String val) throws IOException {

    CSVParser parser = new CSVParser(new StringReader(val), CSVFormat.DEFAULT);
    CSVRecord record = parser.getRecords().get(0);
    Iterator<String> valuesIt = record.iterator();
    String[] input = new String[record.size()];
    int i = 0;/* w  w w .  jav a2  s  .  c  o  m*/
    while (valuesIt.hasNext()) {
        input[i] = valuesIt.next();
        i++;
    }
    parser.close();
    return input;
}

From source file:edu.nyu.vida.data_polygamy.utils.FrameworkUtils.java

/**
 * String Parsing /*from ww w  .  j av a2  s .  c o m*/
 */

public static String[] splitStr(String val, Integer len) throws IOException {

    String[] input;

    try {
        CSVParser parser = new CSVParser(new StringReader(val), CSVFormat.DEFAULT);
        CSVRecord record = parser.getRecords().get(0);
        input = new String[len];
        Iterator<String> valuesIt = record.iterator();
        int i = 0;
        while (valuesIt.hasNext()) {
            input[i] = valuesIt.next().trim();
            i++;
        }
        parser.close();
    } catch (ArrayIndexOutOfBoundsException e) {
        input = val.split(",", len);
        for (int i = 0; i < input.length; i++)
            input[i] = input[i].trim();
    }

    return input;
}

From source file:edu.harvard.iq.dataverse.ingest.tabulardata.impl.plugins.csv.CSVFileReader.java

public int readFile(BufferedReader csvReader, DataTable dataTable, PrintWriter finalOut) throws IOException {

    List<DataVariable> variableList = new ArrayList<>();
    CSVParser parser = new CSVParser(csvReader, inFormat.withHeader());
    Map<String, Integer> headers = parser.getHeaderMap();

    int i = 0;/*from   w w  w  .  j  ava 2  s .  c o m*/
    for (String varName : headers.keySet()) {
        if (varName == null || varName.isEmpty()) {
            // TODO:
            // Add a sensible variable name validation algorithm.
            // -- L.A. 4.0 alpha 1
            throw new IOException(BundleUtil.getStringFromBundle("ingest.csv.invalidHeader"));
        }

        DataVariable dv = new DataVariable();
        dv.setName(varName);
        dv.setLabel(varName);
        dv.setInvalidRanges(new ArrayList<>());
        dv.setSummaryStatistics(new ArrayList<>());
        dv.setUnf("UNF:6:NOTCALCULATED");
        dv.setCategories(new ArrayList<>());
        variableList.add(dv);

        dv.setTypeCharacter();
        dv.setIntervalDiscrete();
        dv.setFileOrder(i);
        dv.setDataTable(dataTable);
        i++;
    }

    dataTable.setVarQuantity((long) variableList.size());
    dataTable.setDataVariables(variableList);

    boolean[] isNumericVariable = new boolean[headers.size()];
    boolean[] isIntegerVariable = new boolean[headers.size()];
    boolean[] isTimeVariable = new boolean[headers.size()];
    boolean[] isDateVariable = new boolean[headers.size()];

    for (i = 0; i < headers.size(); i++) {
        // OK, let's assume that every variable is numeric;
        // but we'll go through the file and examine every value; the
        // moment we find a value that's not a legit numeric one, we'll
        // assume that it is in fact a String.
        isNumericVariable[i] = true;
        isIntegerVariable[i] = true;
        isDateVariable[i] = true;
        isTimeVariable[i] = true;
    }

    // First, "learning" pass.
    // (we'll save the incoming stream in another temp file:)
    SimpleDateFormat[] selectedDateTimeFormat = new SimpleDateFormat[headers.size()];
    SimpleDateFormat[] selectedDateFormat = new SimpleDateFormat[headers.size()];

    File firstPassTempFile = File.createTempFile("firstpass-", ".csv");

    try (CSVPrinter csvFilePrinter = new CSVPrinter(
            // TODO allow other parsers of tabular data to use this parser by changin inFormat
            new FileWriter(firstPassTempFile.getAbsolutePath()), inFormat)) {
        //Write  headers
        csvFilePrinter.printRecord(headers.keySet());
        for (CSVRecord record : parser.getRecords()) {
            // Checks if #records = #columns in header
            if (!record.isConsistent()) {
                List<String> args = Arrays.asList(new String[] { "" + (parser.getCurrentLineNumber() - 1),
                        "" + headers.size(), "" + record.size() });
                throw new IOException(BundleUtil.getStringFromBundle("ingest.csv.recordMismatch", args));
            }

            for (i = 0; i < headers.size(); i++) {
                String varString = record.get(i);
                isIntegerVariable[i] = isIntegerVariable[i] && varString != null
                        && (varString.isEmpty() || varString.equals("null")
                                || (firstNumCharSet.contains(varString.charAt(0))
                                        && StringUtils.isNumeric(varString.substring(1))));
                if (isNumericVariable[i]) {
                    // If variable might be "numeric" test to see if this value is a parsable number:
                    if (varString != null && !varString.isEmpty()) {

                        boolean isNumeric = false;
                        boolean isInteger = false;

                        if (varString.equalsIgnoreCase("NaN") || varString.equalsIgnoreCase("NA")
                                || varString.equalsIgnoreCase("Inf") || varString.equalsIgnoreCase("+Inf")
                                || varString.equalsIgnoreCase("-Inf") || varString.equalsIgnoreCase("null")) {
                            continue;
                        } else {
                            try {
                                Double testDoubleValue = new Double(varString);
                                continue;
                            } catch (NumberFormatException ex) {
                                // the token failed to parse as a double
                                // so the column is a string variable.
                            }
                        }
                        isNumericVariable[i] = false;
                    }
                }

                // If this is not a numeric column, see if it is a date collumn
                // by parsing the cell as a date or date-time value:
                if (!isNumericVariable[i]) {

                    Date dateResult = null;

                    if (isTimeVariable[i]) {
                        if (varString != null && !varString.isEmpty()) {
                            boolean isTime = false;

                            if (selectedDateTimeFormat[i] != null) {
                                ParsePosition pos = new ParsePosition(0);
                                dateResult = selectedDateTimeFormat[i].parse(varString, pos);

                                if (dateResult != null && pos.getIndex() == varString.length()) {
                                    // OK, successfully parsed a value!
                                    isTime = true;
                                }
                            } else {
                                for (SimpleDateFormat format : TIME_FORMATS) {
                                    ParsePosition pos = new ParsePosition(0);
                                    dateResult = format.parse(varString, pos);
                                    if (dateResult != null && pos.getIndex() == varString.length()) {
                                        // OK, successfully parsed a value!
                                        isTime = true;
                                        selectedDateTimeFormat[i] = format;
                                        break;
                                    }
                                }
                            }
                            if (!isTime) {
                                isTimeVariable[i] = false;
                                // if the token didn't parse as a time value,
                                // we will still try to parse it as a date, below.
                                // unless this column is NOT a date.
                            } else {
                                // And if it is a time value, we are going to assume it's
                                // NOT a date.
                                isDateVariable[i] = false;
                            }
                        }
                    }

                    if (isDateVariable[i]) {
                        if (varString != null && !varString.isEmpty()) {
                            boolean isDate = false;

                            // TODO:
                            // Strictly speaking, we should be doing the same thing
                            // here as with the time formats above; select the
                            // first one that works, then insist that all the
                            // other values in this column match it... but we
                            // only have one, as of now, so it should be ok.
                            // -- L.A. 4.0 beta
                            for (SimpleDateFormat format : DATE_FORMATS) {
                                // Strict parsing - it will throw an
                                // exception if it doesn't parse!
                                format.setLenient(false);
                                try {
                                    format.parse(varString);
                                    isDate = true;
                                    selectedDateFormat[i] = format;
                                    break;
                                } catch (ParseException ex) {
                                    //Do nothing
                                }
                            }
                            isDateVariable[i] = isDate;
                        }
                    }
                }
            }

            csvFilePrinter.printRecord(record);
        }
    }
    dataTable.setCaseQuantity(parser.getRecordNumber());
    parser.close();
    csvReader.close();

    // Re-type the variables that we've determined are numerics:
    for (i = 0; i < headers.size(); i++) {
        if (isNumericVariable[i]) {
            dataTable.getDataVariables().get(i).setTypeNumeric();

            if (isIntegerVariable[i]) {
                dataTable.getDataVariables().get(i).setIntervalDiscrete();
            } else {
                dataTable.getDataVariables().get(i).setIntervalContinuous();
            }
        } else if (isDateVariable[i] && selectedDateFormat[i] != null) {
            // Dates are still Strings, i.e., they are "character" and "discrete";
            // But we add special format values for them:
            dataTable.getDataVariables().get(i).setFormat(DATE_FORMATS[0].toPattern());
            dataTable.getDataVariables().get(i).setFormatCategory("date");
        } else if (isTimeVariable[i] && selectedDateTimeFormat[i] != null) {
            // Same for time values:
            dataTable.getDataVariables().get(i).setFormat(selectedDateTimeFormat[i].toPattern());
            dataTable.getDataVariables().get(i).setFormatCategory("time");
        }
    }
    // Second, final pass.
    try (BufferedReader secondPassReader = new BufferedReader(new FileReader(firstPassTempFile))) {
        parser = new CSVParser(secondPassReader, inFormat.withHeader());
        String[] caseRow = new String[headers.size()];

        for (CSVRecord record : parser) {
            if (!record.isConsistent()) {
                List<String> args = Arrays.asList(new String[] { "" + (parser.getCurrentLineNumber() - 1),
                        "" + headers.size(), "" + record.size() });
                throw new IOException(BundleUtil.getStringFromBundle("ingest.csv.recordMismatch", args));
            }

            for (i = 0; i < headers.size(); i++) {
                String varString = record.get(i);
                if (isNumericVariable[i]) {
                    if (varString == null || varString.isEmpty() || varString.equalsIgnoreCase("NA")) {
                        // Missing value - represented as an empty string in
                        // the final tab file
                        caseRow[i] = "";
                    } else if (varString.equalsIgnoreCase("NaN")) {
                        // "Not a Number" special value:
                        caseRow[i] = "NaN";
                    } else if (varString.equalsIgnoreCase("Inf") || varString.equalsIgnoreCase("+Inf")) {
                        // Positive infinity:
                        caseRow[i] = "Inf";
                    } else if (varString.equalsIgnoreCase("-Inf")) {
                        // Negative infinity:
                        caseRow[i] = "-Inf";
                    } else if (varString.equalsIgnoreCase("null")) {
                        // By request from Gus - "NULL" is recognized as a
                        // numeric zero:
                        caseRow[i] = isIntegerVariable[i] ? "0" : "0.0";
                    } else {
                        /* No re-formatting is done on any other numeric values.
                         * We'll save them as they were, for archival purposes.
                         * The alternative solution - formatting in sci. notation
                         * is commented-out below.
                         */
                        caseRow[i] = varString;
                        /*
                         if (isIntegerVariable[i]) {
                        try {
                            Integer testIntegerValue = new Integer(varString);
                            caseRow[i] = testIntegerValue.toString();
                        } catch (NumberFormatException ex) {
                            throw new IOException("Failed to parse a value recognized as an integer in the first pass! (?)");
                        }
                        } else {
                        try {
                            Double testDoubleValue = new Double(varString);
                            if (testDoubleValue.equals(0.0)) {
                                caseRow[i] = "0.0";
                            } else {
                                                                    // One possible implementation:
                                //
                                // Round our fractional values to 15 digits
                                // (minimum number of digits of precision guaranteed by
                                // type Double) and format the resulting representations
                                // in a IEEE 754-like "scientific notation" - for ex.,
                                // 753.24 will be encoded as 7.5324e2
                                BigDecimal testBigDecimal = new BigDecimal(varString, doubleMathContext);
                                caseRow[i] = String.format(FORMAT_IEEE754, testBigDecimal);
                                
                                // Strip meaningless zeros and extra + signs:
                                caseRow[i] = caseRow[i].replaceFirst("00*e", "e");
                                caseRow[i] = caseRow[i].replaceFirst("\\.e", ".0e");
                                caseRow[i] = caseRow[i].replaceFirst("e\\+00", "");
                                caseRow[i] = caseRow[i].replaceFirst("^\\+", "");
                            }
                        } catch (NumberFormatException ex) {
                            throw new IOException("Failed to parse a value recognized as numeric in the first pass! (?)");
                        }
                        }
                         */
                    }
                } else if (isTimeVariable[i] || isDateVariable[i]) {
                    // Time and Dates are stored NOT quoted (don't ask).
                    if (varString != null) {
                        // Dealing with quotes:
                        // remove the leading and trailing quotes, if present:
                        varString = varString.replaceFirst("^\"*", "");
                        varString = varString.replaceFirst("\"*$", "");
                        caseRow[i] = varString;
                    } else {
                        caseRow[i] = "";
                    }
                } else {
                    // Treat as a String:
                    // Strings are stored in tab files quoted;
                    // Missing values are stored as an empty string
                    // between two tabs (or one tab and the new line);
                    // Empty strings stored as "" (quoted empty string).
                    // For the purposes  of this CSV ingest reader, we are going
                    // to assume that all the empty strings in the file are
                    // indeed empty strings, and NOT missing values:
                    if (varString != null) {
                        // escape the quotes, newlines, and tabs:
                        varString = varString.replace("\"", "\\\"");
                        varString = varString.replace("\n", "\\n");
                        varString = varString.replace("\t", "\\t");
                        // final pair of quotes:
                        varString = "\"" + varString + "\"";
                        caseRow[i] = varString;
                    } else {
                        caseRow[i] = "\"\"";
                    }
                }
            }
            finalOut.println(StringUtils.join(caseRow, "\t"));
        }
    }
    long linecount = parser.getRecordNumber();
    finalOut.close();
    parser.close();
    dbglog.fine("Tmp File: " + firstPassTempFile);
    // Firstpass file is deleted to prevent tmp from filling up.
    firstPassTempFile.delete();
    if (dataTable.getCaseQuantity().intValue() != linecount) {
        List<String> args = Arrays
                .asList(new String[] { "" + dataTable.getCaseQuantity().intValue(), "" + linecount });
        throw new IOException(BundleUtil.getStringFromBundle("ingest.csv.line_mismatch", args));
    }
    return (int) linecount;
}

From source file:net.tradelib.core.Series.java

static public Series fromCsv(String path, boolean header, DateTimeFormatter dtf, LocalTime lt)
        throws Exception {

    if (dtf == null) {
        if (lt == null)
            dtf = DateTimeFormatter.ISO_LOCAL_DATE_TIME;
        else//from ww  w.jav  a  2 s  . co  m
            dtf = DateTimeFormatter.ISO_DATE;
    }

    // Parse and import the csv
    CSVFormat csvFmt = CSVFormat.DEFAULT.withCommentMarker('#').withIgnoreSurroundingSpaces();
    if (header)
        csvFmt = csvFmt.withHeader();
    CSVParser csv = csvFmt.parse(new BufferedReader(new FileReader(path)));

    int ncols = -1;
    Series result = null;
    double[] values = null;

    for (CSVRecord rec : csv.getRecords()) {
        if (result == null) {
            ncols = rec.size() - 1;
            values = new double[ncols];
            result = new Series(ncols);
        }

        for (int ii = 0; ii < ncols; ++ii) {
            values[ii] = Double.parseDouble(rec.get(ii + 1));
        }

        LocalDateTime ldt;
        if (lt != null) {
            ldt = LocalDate.parse(rec.get(0), dtf).atTime(lt);
        } else {
            ldt = LocalDateTime.parse(rec.get(0), dtf);
        }

        result.append(ldt, values);
    }

    if (header) {
        Map<String, Integer> headerMap = csv.getHeaderMap();
        result.clearNames();
        for (Map.Entry<String, Integer> me : headerMap.entrySet()) {
            if (me.getValue() > 0)
                result.setName(me.getKey(), me.getValue() - 1);
        }
    }

    return result;
}

From source file:nl.utwente.trafficanalyzer.GeoTagger.java

public static List readCsvFile(File fileName) {

    FileReader fileReader = null;
    CSVParser csvFileParser = null;

    //Create the CSVFormat object with the header mapping
    CSVFormat csvFileFormat = CSVFormat.DEFAULT;

    try {/*from  w  w w.j  a va 2s  . co  m*/

        //initialize FileReader object
        fileReader = new FileReader(fileName);

        //initialize CSVParser object
        csvFileParser = new CSVParser(fileReader, csvFileFormat);

        //Get a list of CSV file records
        List csvRecords = csvFileParser.getRecords();

        return csvRecords;
    } catch (Exception e) {
        System.out.println("Error in CsvFileReader !!!");
        e.printStackTrace();

    } finally {
        try {
            fileReader.close();
            csvFileParser.close();
        } catch (IOException e) {
            System.out.println("Error while closing fileReader/csvFileParser !!!");
            e.printStackTrace();
        }
    }
    return null;

}

From source file:no.uio.medicine.virsurveillance.parsers.CSVsGBDdata.java

public void parse(String deathFolder) throws IOException {
    File f = new File(deathFolder);
    Runtime runtime = Runtime.getRuntime();
    if (f.isDirectory()) {
        String[] filesInDir = f.list();

        for (String fil : filesInDir) {
            if (fil.endsWith(".zip")) {
                ZipFile zipFile = new ZipFile(deathFolder + "/" + fil);

                Enumeration<? extends ZipEntry> entries = zipFile.entries();

                while (entries.hasMoreElements()) {
                    System.out.println(
                            "Used memory: " + (runtime.totalMemory() - runtime.freeMemory()) / (1024 * 1024)
                                    + " Free memory: " + (runtime.freeMemory()) / (1024 * 1024));

                    ZipEntry entry = entries.nextElement();
                    InputStream stream = zipFile.getInputStream(entry);
                    BufferedReader br = new BufferedReader(new InputStreamReader(stream, "UTF-8"));
                    CSVParser parser = CSVFormat.RFC4180.withDelimiter(',').withIgnoreEmptyLines().withHeader()
                            .parse(br);//from  w w  w  .  ja  va 2  s. co  m

                    List<CSVRecord> records = parser.getRecords();
                    System.out.println("Reading records: " + zipFile.getName() + "/" + entry);
                    /*for (int i=0;i<records.size();i++) {
                    CSVRecord csvRecord = records.get(i);*/
                    for (CSVRecord csvRecord : records) {
                        if (csvRecord.isMapped("age_group_id")) { //age group 22 corresponds to all ages
                            if (csvRecord.get("age_group_id").equalsIgnoreCase("22")) {
                                String location = null;
                                String year = null;
                                String sex = null;
                                String cause = null;
                                String number = null;
                                String metric = null;

                                if (csvRecord.isMapped("location_code")) {
                                    location = csvRecord.get("location_code");
                                }
                                if (csvRecord.isMapped("year")) {
                                    year = csvRecord.get("year");
                                }
                                if (csvRecord.isMapped("sex_id")) { //1=male, 2 = female
                                    if (csvRecord.get("sex_id").equalsIgnoreCase(("1"))) {
                                        sex = "m";
                                    } else if (csvRecord.get("sex_id").equalsIgnoreCase("2")) {
                                        sex = "f";
                                    }
                                }
                                if (csvRecord.isMapped("cause_name")) {
                                    cause = csvRecord.get("cause_name");
                                }
                                if (csvRecord.isMapped("mean")) {
                                    number = csvRecord.get("mean");
                                }
                                if (csvRecord.isMapped("metric") && csvRecord.isMapped("unit")) {
                                    metric = csvRecord.get("metric") + "-" + csvRecord.get("unit");
                                }

                                if (location != null && year != null && sex != null && cause != null
                                        && number != null && metric != null) {
                                    try {
                                        sqlM.addSanitaryIssueToCountry(location, year, sex, cause, metric,
                                                number);
                                    } catch (SQLException ex) {

                                        Logger.getLogger(CSVsGBDdata.class.getName()).log(Level.SEVERE, null,
                                                ex);
                                    }
                                }

                            }

                        }

                    }

                    parser.close();

                    stream.close();
                    br.close();
                }
                zipFile.close();
            }
        }
    } else {
        System.out.println("Not a directory");
    }
}

From source file:org.apache.ambari.server.api.services.serializers.CsvSerializerTest.java

@Test
public void testSerializeResources_NoColumnInfo() throws Exception {
    Result result = new ResultImpl(true);
    result.setResultStatus(new ResultStatus(ResultStatus.STATUS.OK));
    TreeNode<Resource> tree = result.getResultTree();

    List<TreeMap<String, Object>> data = new ArrayList<TreeMap<String, Object>>() {
        {/*w  w w . java  2 s  .  c om*/
            add(new TreeMap<String, Object>() {
                {
                    put("property1", "value1a");
                    put("property2", "value2a");
                    put("property3", "value3a");
                    put("property4", "value4a");
                }
            });
            add(new TreeMap<String, Object>() {
                {
                    put("property1", "value1'b");
                    put("property2", "value2'b");
                    put("property3", "value3'b");
                    put("property4", "value4'b");
                }
            });
            add(new TreeMap<String, Object>() {
                {
                    put("property1", "value1,c");
                    put("property2", "value2,c");
                    put("property3", "value3,c");
                    put("property4", "value4,c");
                }
            });
        }
    };

    tree.setName("items");
    tree.setProperty("isCollection", "true");

    addChildResource(tree, "resource", 0, data.get(0));
    addChildResource(tree, "resource", 1, data.get(1));
    addChildResource(tree, "resource", 2, data.get(2));

    replayAll();

    //execute test
    Object o = new CsvSerializer().serialize(result).toString().replace("\r", "");

    verifyAll();

    assertNotNull(o);

    StringReader reader = new StringReader(o.toString());
    CSVParser csvParser = new CSVParser(reader, CSVFormat.DEFAULT);
    List<CSVRecord> records = csvParser.getRecords();

    assertNotNull(records);
    assertEquals(3, records.size());

    int i = 0;
    for (CSVRecord record : records) {
        TreeMap<String, Object> actualData = data.get(i++);
        assertEquals(actualData.size(), record.size());

        for (String item : record) {
            assertTrue(actualData.containsValue(item));
        }
    }

    csvParser.close();
}

From source file:org.apache.beam.sdk.extensions.sql.impl.schema.BeamTableUtils.java

public static BeamRecord csvLine2BeamSqlRow(CSVFormat csvFormat, String line,
        BeamRecordSqlType beamRecordSqlType) {
    List<Object> fieldsValue = new ArrayList<>(beamRecordSqlType.getFieldCount());
    try (StringReader reader = new StringReader(line)) {
        CSVParser parser = csvFormat.parse(reader);
        CSVRecord rawRecord = parser.getRecords().get(0);

        if (rawRecord.size() != beamRecordSqlType.getFieldCount()) {
            throw new IllegalArgumentException(String.format("Expect %d fields, but actually %d",
                    beamRecordSqlType.getFieldCount(), rawRecord.size()));
        } else {/*w w  w. j  ava 2 s .  c  o  m*/
            for (int idx = 0; idx < beamRecordSqlType.getFieldCount(); idx++) {
                String raw = rawRecord.get(idx);
                fieldsValue.add(autoCastField(beamRecordSqlType.getFieldTypeByIndex(idx), raw));
            }
        }
    } catch (IOException e) {
        throw new IllegalArgumentException("decodeRecord failed!", e);
    }
    return new BeamRecord(beamRecordSqlType, fieldsValue);
}