List of usage examples for org.apache.commons.csv CSVParser getRecords
public List<CSVRecord> getRecords() throws IOException
From source file:com.amazonaws.services.dynamodbv2.online.index.integration.tests.ViolationDetectionTest.java
/** * Verifies the contents of the violation detection output file *///w w w . j a va 2 s. com private void checkViolationFileOutput(String detectorOpFile, String tableHashKey, String tableRangeKey, String tableHashKeyType, String tableRangeKeyType, boolean violatedValueExists, List<Map<String, AttributeValue>> hashKeySizeViolations, List<Map<String, AttributeValue>> rangeKeySizeViolations, List<Map<String, AttributeValue>> hashKeyTypeViolations, List<Map<String, AttributeValue>> rangeKeyTypeViolations, int violatedSize, String gsiHashKeyName, String gsiHashKeyType, String gsiRangeKeyName, String gsiRangeKeyType) throws IOException { Map<String, String> hashToRangeHashSizeViolationsMap = new HashMap<String, String>(); Map<String, String> hashToRangeRangeSizeViolationsMap = new HashMap<String, String>(); Map<String, String> hashToRangeHashTypeViolationsMap = new HashMap<String, String>(); Map<String, String> hashToRangeRangeTypeViolationsMap = new HashMap<String, String>(); Map<String, String> hashToGsiHashSizeViolationsMap = new HashMap<String, String>(); Map<String, String> hashToGsiRangeSizeViolationsMap = new HashMap<String, String>(); Map<String, String> hashToGsiHashTypeViolationsMap = new HashMap<String, String>(); Map<String, String> hashToGsiRangeTypeViolationsMap = new HashMap<String, String>(); BufferedReader br = null; CSVParser parser = null; try { br = new BufferedReader(new FileReader(new File(detectorOpFile))); parser = new CSVParser(br, TestUtils.csvFormat); List<CSVRecord> csvRecords = parser.getRecords(); for (CSVRecord csvRecord : csvRecords) { String hashKey = csvRecord.get(ViolationRecord.TABLE_HASH_KEY); String rangeKey = csvRecord.get(ViolationRecord.TABLE_RANGE_KEY); String gsiHashKeyValue = null; if (violatedValueExists) { gsiHashKeyValue = csvRecord.get(ViolationRecord.GSI_HASH_KEY); } String hashKeyViolationType = csvRecord.get(ViolationRecord.GSI_HASH_KEY_VIOLATION_TYPE); String hashKeyViolationDesc = csvRecord.get(ViolationRecord.GSI_HASH_KEY_VIOLATION_DESC); String gsiRangeKeyValue = null; if (violatedValueExists) { gsiRangeKeyValue = csvRecord.get(ViolationRecord.GSI_RANGE_KEY); } String rangeKeyViolationType = csvRecord.get(ViolationRecord.GSI_RANGE_KEY_VIOLATION_TYPE); String rangeKeyViolationDesc = csvRecord.get(ViolationRecord.GSI_RANGE_KEY_VIOLATION_DESC); boolean foundViolation = false; if (hashKeyViolationType.equals("Size Violation")) { foundViolation = true; hashToRangeHashSizeViolationsMap.put(hashKey, rangeKey); if (violatedValueExists) { hashToGsiHashSizeViolationsMap.put(hashKey, gsiHashKeyValue); } Assert.assertTrue("Gsi hash key size violation description is incorrect", hashKeyViolationDesc.equals("Max Bytes Allowed: " + TestUtils.MAX_HASH_KEY_SIZE + " Found: " + violatedSize)); } else if (hashKeyViolationType.equals("Type Violation")) { foundViolation = true; hashToRangeHashTypeViolationsMap.put(hashKey, rangeKey); if (violatedValueExists) { hashToGsiHashTypeViolationsMap.put(hashKey, gsiHashKeyValue); } Assert.assertTrue("Gsi hash key type violation description is incorrect", hashKeyViolationDesc.equals("Expected: " + gsiHashKeyType + " Found: " + TestUtils.returnDifferentAttributeType(gsiHashKeyType))); } else { Assert.assertTrue("Hash key violation description exists even when there is no violation type", hashKeyViolationDesc.equals("")); } if (rangeKeyViolationType.equals("Size Violation")) { foundViolation = true; hashToRangeRangeSizeViolationsMap.put(hashKey, rangeKey); if (violatedValueExists) { hashToGsiRangeSizeViolationsMap.put(hashKey, gsiRangeKeyValue); } Assert.assertTrue("GSI range key size violation description is incorrect", rangeKeyViolationDesc.equals("Max Bytes Allowed: " + TestUtils.MAX_RANGE_KEY_SIZE + " Found: " + violatedSize)); } else if (rangeKeyViolationType.equals("Type Violation")) { foundViolation = true; hashToRangeRangeTypeViolationsMap.put(hashKey, rangeKey); if (violatedValueExists) { hashToGsiRangeTypeViolationsMap.put(hashKey, gsiRangeKeyValue); } Assert.assertTrue("Gsi range key type violation description is incorrect", rangeKeyViolationDesc.equals("Expected: " + gsiRangeKeyType + " Found: " + TestUtils.returnDifferentAttributeType(gsiRangeKeyType))); } else { Assert.assertTrue("Range key violation description exists even when there is no violation type", rangeKeyViolationDesc.equals("")); } Assert.assertTrue("No violation found in a row!", foundViolation); } if (hashKeySizeViolations != null) { for (Map<String, AttributeValue> item : hashKeySizeViolations) { AttributeValue tableHashAttr = item.get(tableHashKey); String expectedTableHashKey = AttributeValueConverter.toBlankString(tableHashAttr); if (hashToRangeHashSizeViolationsMap.containsKey(expectedTableHashKey)) { if (tableRangeKey != null) { AttributeValue tableRangeAttr = item.get(tableRangeKey); String expectedTableRangeKey = AttributeValueConverter.toBlankString(tableRangeAttr); Assert.assertEquals( "Size violated GSI hash key's table's hash key's range key does not match in the output!", expectedTableRangeKey, hashToRangeHashSizeViolationsMap.get(expectedTableHashKey)); } hashToRangeHashSizeViolationsMap.remove(expectedTableHashKey); } else { Assert.fail("Expected size violation on hash key not found!"); } // Check for gsi hash value if (violatedValueExists) { AttributeValue gsiHashAttr = item.get(gsiHashKeyName); String expectedGsiHashValue = AttributeValueConverter .toStringWithAttributeType(gsiHashAttr); Assert.assertEquals("Size violated Gsi hash value mis-match", expectedGsiHashValue, hashToGsiHashSizeViolationsMap.get(expectedTableHashKey)); hashToGsiHashSizeViolationsMap.remove(expectedTableHashKey); } } Assert.assertEquals("Extra entries found for gsi hash key size violations", 0, hashToRangeHashSizeViolationsMap.size()); Assert.assertEquals("Extra entries found for gsi hash key size violation values", 0, hashToGsiHashSizeViolationsMap.size()); } if (rangeKeySizeViolations != null) { for (Map<String, AttributeValue> item : rangeKeySizeViolations) { AttributeValue tableHashAttr = item.get(tableHashKey); String expectedTableHashKey = AttributeValueConverter.toBlankString(tableHashAttr); if (hashToRangeRangeSizeViolationsMap.containsKey(expectedTableHashKey)) { if (tableRangeKey != null) { AttributeValue tableRangeAttr = item.get(tableRangeKey); String expectedTableRangeKey = AttributeValueConverter.toBlankString(tableRangeAttr); Assert.assertEquals( "Size violated GSI range key's table's hash key's range key does not match in the output!", expectedTableRangeKey, hashToRangeRangeSizeViolationsMap.get(expectedTableHashKey)); } hashToRangeRangeSizeViolationsMap.remove(expectedTableHashKey); } else { Assert.fail("Expected size violation on range key not found!"); } // Check for gsi range value if (violatedValueExists) { AttributeValue gsiRangeAttr = item.get(gsiRangeKeyName); String expectedGsiRangeValue = AttributeValueConverter .toStringWithAttributeType(gsiRangeAttr); Assert.assertEquals("Size violated Gsi range value mis-match", expectedGsiRangeValue, hashToGsiRangeSizeViolationsMap.get(expectedTableHashKey)); hashToGsiRangeSizeViolationsMap.remove(expectedTableHashKey); } } Assert.assertEquals("Extra entries found for gsi range key size violations", 0, hashToRangeRangeSizeViolationsMap.size()); Assert.assertEquals("Extra entries found for gsi range key size violation values", 0, hashToGsiRangeSizeViolationsMap.size()); } if (hashKeyTypeViolations != null) { for (Map<String, AttributeValue> item : hashKeyTypeViolations) { AttributeValue tableHashAttr = item.get(tableHashKey); String expectedTableHashKey = AttributeValueConverter.toBlankString(tableHashAttr); if (hashToRangeHashTypeViolationsMap.containsKey(expectedTableHashKey)) { if (tableRangeKey != null) { AttributeValue tableRangeAttr = item.get(tableRangeKey); String exptectedTableRangeKey = AttributeValueConverter.toBlankString(tableRangeAttr); Assert.assertEquals( "Type violated GSI hash key's table's hash key's range key does not match in the output!", exptectedTableRangeKey, hashToRangeHashTypeViolationsMap.get(expectedTableHashKey)); } hashToRangeHashTypeViolationsMap.remove(expectedTableHashKey); } else { Assert.fail("Expected type violation on hash key not found!"); } // Check for gsi hash value if (violatedValueExists) { AttributeValue gsiHashAttr = item.get(gsiHashKeyName); String expectedGsiHashValue = AttributeValueConverter .toStringWithAttributeType(gsiHashAttr); Assert.assertEquals("Type violated Gsi hash value mis-match", expectedGsiHashValue, hashToGsiHashTypeViolationsMap.get(expectedTableHashKey)); hashToGsiHashTypeViolationsMap.remove(expectedTableHashKey); } } Assert.assertEquals("Extra entries found for gsi hash key type violations", 0, hashToRangeHashTypeViolationsMap.size()); Assert.assertEquals("Extra entries found for gsi hash key type violation values", 0, hashToGsiHashTypeViolationsMap.size()); } if (rangeKeyTypeViolations != null) { for (Map<String, AttributeValue> item : rangeKeyTypeViolations) { AttributeValue tableHashAttr = item.get(tableHashKey); String expectedTableHashKey = AttributeValueConverter.toBlankString(tableHashAttr); if (hashToRangeRangeTypeViolationsMap.containsKey(expectedTableHashKey)) { if (tableRangeKey != null) { AttributeValue tableRangeAttr = item.get(tableRangeKey); String exptectedTableRangeKey = AttributeValueConverter.toBlankString(tableRangeAttr); Assert.assertEquals( "Type violated GSI range key's table's hash key's range key does not match in the output!", exptectedTableRangeKey, hashToRangeRangeTypeViolationsMap.get(expectedTableHashKey)); } hashToRangeRangeTypeViolationsMap.remove(expectedTableHashKey); } else { Assert.fail("Expected type violation on range key not found!"); } // Check for gsi range value if (violatedValueExists) { AttributeValue gsiRangeAttr = item.get(gsiRangeKeyName); String expectedGsiRangeValue = AttributeValueConverter .toStringWithAttributeType(gsiRangeAttr); Assert.assertEquals("Type violated Gsi range value mis-match", expectedGsiRangeValue, hashToGsiRangeTypeViolationsMap.get(expectedTableHashKey)); hashToGsiRangeTypeViolationsMap.remove(expectedTableHashKey); } } Assert.assertEquals("Extra entries found for gsi range key type violations", 0, hashToRangeRangeTypeViolationsMap.size()); Assert.assertEquals("Extra entries found for gsi range key type violation values", 0, hashToGsiRangeTypeViolationsMap.size()); } } finally { br.close(); parser.close(); } }
From source file:com.amazonaws.services.dynamodbv2.online.index.integration.tests.ViolationCorrectionTest.java
/** * Validates the output of violation correction. *//*from w w w. ja v a 2s . c o m*/ private void validateCorrectionOutput(String correctionOutputFile, List<List<String>> errorRecords) throws IOException { BufferedReader br = null; CSVParser parser = null; try { br = new BufferedReader(new FileReader(new File(correctionOutputFile))); parser = new CSVParser(br, TestUtils.csvFormat); List<CSVRecord> records = parser.getRecords(); Assert.assertEquals("Error record count does not match", errorRecords.size(), records.size()); for (CSVRecord record : records) { boolean foundError = false; List<String> readRecord = new ArrayList<String>(); for (int i = 0; i < record.size(); i++) { if (record.get(i).equals(record.get(ViolationRecord.GSI_VALUE_UPDATE_ERROR))) { foundError = true; continue; } else { readRecord.add(record.get(i)); } } Assert.assertTrue("Error column not found", foundError); Assert.assertTrue("Unexpected record read from correction output", errorRecords.contains(readRecord)); errorRecords.remove(readRecord); } } finally { br.close(); parser.close(); } }
From source file:edu.nyu.vida.data_polygamy.utils.FrameworkUtils.java
public static String[] splitStr(String val) throws IOException { CSVParser parser = new CSVParser(new StringReader(val), CSVFormat.DEFAULT); CSVRecord record = parser.getRecords().get(0); Iterator<String> valuesIt = record.iterator(); String[] input = new String[record.size()]; int i = 0;/* w w w . jav a2 s . c o m*/ while (valuesIt.hasNext()) { input[i] = valuesIt.next(); i++; } parser.close(); return input; }
From source file:edu.nyu.vida.data_polygamy.utils.FrameworkUtils.java
/** * String Parsing /*from ww w . j av a2 s . c o m*/ */ public static String[] splitStr(String val, Integer len) throws IOException { String[] input; try { CSVParser parser = new CSVParser(new StringReader(val), CSVFormat.DEFAULT); CSVRecord record = parser.getRecords().get(0); input = new String[len]; Iterator<String> valuesIt = record.iterator(); int i = 0; while (valuesIt.hasNext()) { input[i] = valuesIt.next().trim(); i++; } parser.close(); } catch (ArrayIndexOutOfBoundsException e) { input = val.split(",", len); for (int i = 0; i < input.length; i++) input[i] = input[i].trim(); } return input; }
From source file:edu.harvard.iq.dataverse.ingest.tabulardata.impl.plugins.csv.CSVFileReader.java
public int readFile(BufferedReader csvReader, DataTable dataTable, PrintWriter finalOut) throws IOException { List<DataVariable> variableList = new ArrayList<>(); CSVParser parser = new CSVParser(csvReader, inFormat.withHeader()); Map<String, Integer> headers = parser.getHeaderMap(); int i = 0;/*from w w w . j ava 2 s . c o m*/ for (String varName : headers.keySet()) { if (varName == null || varName.isEmpty()) { // TODO: // Add a sensible variable name validation algorithm. // -- L.A. 4.0 alpha 1 throw new IOException(BundleUtil.getStringFromBundle("ingest.csv.invalidHeader")); } DataVariable dv = new DataVariable(); dv.setName(varName); dv.setLabel(varName); dv.setInvalidRanges(new ArrayList<>()); dv.setSummaryStatistics(new ArrayList<>()); dv.setUnf("UNF:6:NOTCALCULATED"); dv.setCategories(new ArrayList<>()); variableList.add(dv); dv.setTypeCharacter(); dv.setIntervalDiscrete(); dv.setFileOrder(i); dv.setDataTable(dataTable); i++; } dataTable.setVarQuantity((long) variableList.size()); dataTable.setDataVariables(variableList); boolean[] isNumericVariable = new boolean[headers.size()]; boolean[] isIntegerVariable = new boolean[headers.size()]; boolean[] isTimeVariable = new boolean[headers.size()]; boolean[] isDateVariable = new boolean[headers.size()]; for (i = 0; i < headers.size(); i++) { // OK, let's assume that every variable is numeric; // but we'll go through the file and examine every value; the // moment we find a value that's not a legit numeric one, we'll // assume that it is in fact a String. isNumericVariable[i] = true; isIntegerVariable[i] = true; isDateVariable[i] = true; isTimeVariable[i] = true; } // First, "learning" pass. // (we'll save the incoming stream in another temp file:) SimpleDateFormat[] selectedDateTimeFormat = new SimpleDateFormat[headers.size()]; SimpleDateFormat[] selectedDateFormat = new SimpleDateFormat[headers.size()]; File firstPassTempFile = File.createTempFile("firstpass-", ".csv"); try (CSVPrinter csvFilePrinter = new CSVPrinter( // TODO allow other parsers of tabular data to use this parser by changin inFormat new FileWriter(firstPassTempFile.getAbsolutePath()), inFormat)) { //Write headers csvFilePrinter.printRecord(headers.keySet()); for (CSVRecord record : parser.getRecords()) { // Checks if #records = #columns in header if (!record.isConsistent()) { List<String> args = Arrays.asList(new String[] { "" + (parser.getCurrentLineNumber() - 1), "" + headers.size(), "" + record.size() }); throw new IOException(BundleUtil.getStringFromBundle("ingest.csv.recordMismatch", args)); } for (i = 0; i < headers.size(); i++) { String varString = record.get(i); isIntegerVariable[i] = isIntegerVariable[i] && varString != null && (varString.isEmpty() || varString.equals("null") || (firstNumCharSet.contains(varString.charAt(0)) && StringUtils.isNumeric(varString.substring(1)))); if (isNumericVariable[i]) { // If variable might be "numeric" test to see if this value is a parsable number: if (varString != null && !varString.isEmpty()) { boolean isNumeric = false; boolean isInteger = false; if (varString.equalsIgnoreCase("NaN") || varString.equalsIgnoreCase("NA") || varString.equalsIgnoreCase("Inf") || varString.equalsIgnoreCase("+Inf") || varString.equalsIgnoreCase("-Inf") || varString.equalsIgnoreCase("null")) { continue; } else { try { Double testDoubleValue = new Double(varString); continue; } catch (NumberFormatException ex) { // the token failed to parse as a double // so the column is a string variable. } } isNumericVariable[i] = false; } } // If this is not a numeric column, see if it is a date collumn // by parsing the cell as a date or date-time value: if (!isNumericVariable[i]) { Date dateResult = null; if (isTimeVariable[i]) { if (varString != null && !varString.isEmpty()) { boolean isTime = false; if (selectedDateTimeFormat[i] != null) { ParsePosition pos = new ParsePosition(0); dateResult = selectedDateTimeFormat[i].parse(varString, pos); if (dateResult != null && pos.getIndex() == varString.length()) { // OK, successfully parsed a value! isTime = true; } } else { for (SimpleDateFormat format : TIME_FORMATS) { ParsePosition pos = new ParsePosition(0); dateResult = format.parse(varString, pos); if (dateResult != null && pos.getIndex() == varString.length()) { // OK, successfully parsed a value! isTime = true; selectedDateTimeFormat[i] = format; break; } } } if (!isTime) { isTimeVariable[i] = false; // if the token didn't parse as a time value, // we will still try to parse it as a date, below. // unless this column is NOT a date. } else { // And if it is a time value, we are going to assume it's // NOT a date. isDateVariable[i] = false; } } } if (isDateVariable[i]) { if (varString != null && !varString.isEmpty()) { boolean isDate = false; // TODO: // Strictly speaking, we should be doing the same thing // here as with the time formats above; select the // first one that works, then insist that all the // other values in this column match it... but we // only have one, as of now, so it should be ok. // -- L.A. 4.0 beta for (SimpleDateFormat format : DATE_FORMATS) { // Strict parsing - it will throw an // exception if it doesn't parse! format.setLenient(false); try { format.parse(varString); isDate = true; selectedDateFormat[i] = format; break; } catch (ParseException ex) { //Do nothing } } isDateVariable[i] = isDate; } } } } csvFilePrinter.printRecord(record); } } dataTable.setCaseQuantity(parser.getRecordNumber()); parser.close(); csvReader.close(); // Re-type the variables that we've determined are numerics: for (i = 0; i < headers.size(); i++) { if (isNumericVariable[i]) { dataTable.getDataVariables().get(i).setTypeNumeric(); if (isIntegerVariable[i]) { dataTable.getDataVariables().get(i).setIntervalDiscrete(); } else { dataTable.getDataVariables().get(i).setIntervalContinuous(); } } else if (isDateVariable[i] && selectedDateFormat[i] != null) { // Dates are still Strings, i.e., they are "character" and "discrete"; // But we add special format values for them: dataTable.getDataVariables().get(i).setFormat(DATE_FORMATS[0].toPattern()); dataTable.getDataVariables().get(i).setFormatCategory("date"); } else if (isTimeVariable[i] && selectedDateTimeFormat[i] != null) { // Same for time values: dataTable.getDataVariables().get(i).setFormat(selectedDateTimeFormat[i].toPattern()); dataTable.getDataVariables().get(i).setFormatCategory("time"); } } // Second, final pass. try (BufferedReader secondPassReader = new BufferedReader(new FileReader(firstPassTempFile))) { parser = new CSVParser(secondPassReader, inFormat.withHeader()); String[] caseRow = new String[headers.size()]; for (CSVRecord record : parser) { if (!record.isConsistent()) { List<String> args = Arrays.asList(new String[] { "" + (parser.getCurrentLineNumber() - 1), "" + headers.size(), "" + record.size() }); throw new IOException(BundleUtil.getStringFromBundle("ingest.csv.recordMismatch", args)); } for (i = 0; i < headers.size(); i++) { String varString = record.get(i); if (isNumericVariable[i]) { if (varString == null || varString.isEmpty() || varString.equalsIgnoreCase("NA")) { // Missing value - represented as an empty string in // the final tab file caseRow[i] = ""; } else if (varString.equalsIgnoreCase("NaN")) { // "Not a Number" special value: caseRow[i] = "NaN"; } else if (varString.equalsIgnoreCase("Inf") || varString.equalsIgnoreCase("+Inf")) { // Positive infinity: caseRow[i] = "Inf"; } else if (varString.equalsIgnoreCase("-Inf")) { // Negative infinity: caseRow[i] = "-Inf"; } else if (varString.equalsIgnoreCase("null")) { // By request from Gus - "NULL" is recognized as a // numeric zero: caseRow[i] = isIntegerVariable[i] ? "0" : "0.0"; } else { /* No re-formatting is done on any other numeric values. * We'll save them as they were, for archival purposes. * The alternative solution - formatting in sci. notation * is commented-out below. */ caseRow[i] = varString; /* if (isIntegerVariable[i]) { try { Integer testIntegerValue = new Integer(varString); caseRow[i] = testIntegerValue.toString(); } catch (NumberFormatException ex) { throw new IOException("Failed to parse a value recognized as an integer in the first pass! (?)"); } } else { try { Double testDoubleValue = new Double(varString); if (testDoubleValue.equals(0.0)) { caseRow[i] = "0.0"; } else { // One possible implementation: // // Round our fractional values to 15 digits // (minimum number of digits of precision guaranteed by // type Double) and format the resulting representations // in a IEEE 754-like "scientific notation" - for ex., // 753.24 will be encoded as 7.5324e2 BigDecimal testBigDecimal = new BigDecimal(varString, doubleMathContext); caseRow[i] = String.format(FORMAT_IEEE754, testBigDecimal); // Strip meaningless zeros and extra + signs: caseRow[i] = caseRow[i].replaceFirst("00*e", "e"); caseRow[i] = caseRow[i].replaceFirst("\\.e", ".0e"); caseRow[i] = caseRow[i].replaceFirst("e\\+00", ""); caseRow[i] = caseRow[i].replaceFirst("^\\+", ""); } } catch (NumberFormatException ex) { throw new IOException("Failed to parse a value recognized as numeric in the first pass! (?)"); } } */ } } else if (isTimeVariable[i] || isDateVariable[i]) { // Time and Dates are stored NOT quoted (don't ask). if (varString != null) { // Dealing with quotes: // remove the leading and trailing quotes, if present: varString = varString.replaceFirst("^\"*", ""); varString = varString.replaceFirst("\"*$", ""); caseRow[i] = varString; } else { caseRow[i] = ""; } } else { // Treat as a String: // Strings are stored in tab files quoted; // Missing values are stored as an empty string // between two tabs (or one tab and the new line); // Empty strings stored as "" (quoted empty string). // For the purposes of this CSV ingest reader, we are going // to assume that all the empty strings in the file are // indeed empty strings, and NOT missing values: if (varString != null) { // escape the quotes, newlines, and tabs: varString = varString.replace("\"", "\\\""); varString = varString.replace("\n", "\\n"); varString = varString.replace("\t", "\\t"); // final pair of quotes: varString = "\"" + varString + "\""; caseRow[i] = varString; } else { caseRow[i] = "\"\""; } } } finalOut.println(StringUtils.join(caseRow, "\t")); } } long linecount = parser.getRecordNumber(); finalOut.close(); parser.close(); dbglog.fine("Tmp File: " + firstPassTempFile); // Firstpass file is deleted to prevent tmp from filling up. firstPassTempFile.delete(); if (dataTable.getCaseQuantity().intValue() != linecount) { List<String> args = Arrays .asList(new String[] { "" + dataTable.getCaseQuantity().intValue(), "" + linecount }); throw new IOException(BundleUtil.getStringFromBundle("ingest.csv.line_mismatch", args)); } return (int) linecount; }
From source file:net.tradelib.core.Series.java
static public Series fromCsv(String path, boolean header, DateTimeFormatter dtf, LocalTime lt) throws Exception { if (dtf == null) { if (lt == null) dtf = DateTimeFormatter.ISO_LOCAL_DATE_TIME; else//from ww w.jav a 2 s . co m dtf = DateTimeFormatter.ISO_DATE; } // Parse and import the csv CSVFormat csvFmt = CSVFormat.DEFAULT.withCommentMarker('#').withIgnoreSurroundingSpaces(); if (header) csvFmt = csvFmt.withHeader(); CSVParser csv = csvFmt.parse(new BufferedReader(new FileReader(path))); int ncols = -1; Series result = null; double[] values = null; for (CSVRecord rec : csv.getRecords()) { if (result == null) { ncols = rec.size() - 1; values = new double[ncols]; result = new Series(ncols); } for (int ii = 0; ii < ncols; ++ii) { values[ii] = Double.parseDouble(rec.get(ii + 1)); } LocalDateTime ldt; if (lt != null) { ldt = LocalDate.parse(rec.get(0), dtf).atTime(lt); } else { ldt = LocalDateTime.parse(rec.get(0), dtf); } result.append(ldt, values); } if (header) { Map<String, Integer> headerMap = csv.getHeaderMap(); result.clearNames(); for (Map.Entry<String, Integer> me : headerMap.entrySet()) { if (me.getValue() > 0) result.setName(me.getKey(), me.getValue() - 1); } } return result; }
From source file:nl.utwente.trafficanalyzer.GeoTagger.java
public static List readCsvFile(File fileName) { FileReader fileReader = null; CSVParser csvFileParser = null; //Create the CSVFormat object with the header mapping CSVFormat csvFileFormat = CSVFormat.DEFAULT; try {/*from w w w.j a va 2s . co m*/ //initialize FileReader object fileReader = new FileReader(fileName); //initialize CSVParser object csvFileParser = new CSVParser(fileReader, csvFileFormat); //Get a list of CSV file records List csvRecords = csvFileParser.getRecords(); return csvRecords; } catch (Exception e) { System.out.println("Error in CsvFileReader !!!"); e.printStackTrace(); } finally { try { fileReader.close(); csvFileParser.close(); } catch (IOException e) { System.out.println("Error while closing fileReader/csvFileParser !!!"); e.printStackTrace(); } } return null; }
From source file:no.uio.medicine.virsurveillance.parsers.CSVsGBDdata.java
public void parse(String deathFolder) throws IOException { File f = new File(deathFolder); Runtime runtime = Runtime.getRuntime(); if (f.isDirectory()) { String[] filesInDir = f.list(); for (String fil : filesInDir) { if (fil.endsWith(".zip")) { ZipFile zipFile = new ZipFile(deathFolder + "/" + fil); Enumeration<? extends ZipEntry> entries = zipFile.entries(); while (entries.hasMoreElements()) { System.out.println( "Used memory: " + (runtime.totalMemory() - runtime.freeMemory()) / (1024 * 1024) + " Free memory: " + (runtime.freeMemory()) / (1024 * 1024)); ZipEntry entry = entries.nextElement(); InputStream stream = zipFile.getInputStream(entry); BufferedReader br = new BufferedReader(new InputStreamReader(stream, "UTF-8")); CSVParser parser = CSVFormat.RFC4180.withDelimiter(',').withIgnoreEmptyLines().withHeader() .parse(br);//from w w w . ja va 2 s. co m List<CSVRecord> records = parser.getRecords(); System.out.println("Reading records: " + zipFile.getName() + "/" + entry); /*for (int i=0;i<records.size();i++) { CSVRecord csvRecord = records.get(i);*/ for (CSVRecord csvRecord : records) { if (csvRecord.isMapped("age_group_id")) { //age group 22 corresponds to all ages if (csvRecord.get("age_group_id").equalsIgnoreCase("22")) { String location = null; String year = null; String sex = null; String cause = null; String number = null; String metric = null; if (csvRecord.isMapped("location_code")) { location = csvRecord.get("location_code"); } if (csvRecord.isMapped("year")) { year = csvRecord.get("year"); } if (csvRecord.isMapped("sex_id")) { //1=male, 2 = female if (csvRecord.get("sex_id").equalsIgnoreCase(("1"))) { sex = "m"; } else if (csvRecord.get("sex_id").equalsIgnoreCase("2")) { sex = "f"; } } if (csvRecord.isMapped("cause_name")) { cause = csvRecord.get("cause_name"); } if (csvRecord.isMapped("mean")) { number = csvRecord.get("mean"); } if (csvRecord.isMapped("metric") && csvRecord.isMapped("unit")) { metric = csvRecord.get("metric") + "-" + csvRecord.get("unit"); } if (location != null && year != null && sex != null && cause != null && number != null && metric != null) { try { sqlM.addSanitaryIssueToCountry(location, year, sex, cause, metric, number); } catch (SQLException ex) { Logger.getLogger(CSVsGBDdata.class.getName()).log(Level.SEVERE, null, ex); } } } } } parser.close(); stream.close(); br.close(); } zipFile.close(); } } } else { System.out.println("Not a directory"); } }
From source file:org.apache.ambari.server.api.services.serializers.CsvSerializerTest.java
@Test public void testSerializeResources_NoColumnInfo() throws Exception { Result result = new ResultImpl(true); result.setResultStatus(new ResultStatus(ResultStatus.STATUS.OK)); TreeNode<Resource> tree = result.getResultTree(); List<TreeMap<String, Object>> data = new ArrayList<TreeMap<String, Object>>() { {/*w w w . java 2 s . c om*/ add(new TreeMap<String, Object>() { { put("property1", "value1a"); put("property2", "value2a"); put("property3", "value3a"); put("property4", "value4a"); } }); add(new TreeMap<String, Object>() { { put("property1", "value1'b"); put("property2", "value2'b"); put("property3", "value3'b"); put("property4", "value4'b"); } }); add(new TreeMap<String, Object>() { { put("property1", "value1,c"); put("property2", "value2,c"); put("property3", "value3,c"); put("property4", "value4,c"); } }); } }; tree.setName("items"); tree.setProperty("isCollection", "true"); addChildResource(tree, "resource", 0, data.get(0)); addChildResource(tree, "resource", 1, data.get(1)); addChildResource(tree, "resource", 2, data.get(2)); replayAll(); //execute test Object o = new CsvSerializer().serialize(result).toString().replace("\r", ""); verifyAll(); assertNotNull(o); StringReader reader = new StringReader(o.toString()); CSVParser csvParser = new CSVParser(reader, CSVFormat.DEFAULT); List<CSVRecord> records = csvParser.getRecords(); assertNotNull(records); assertEquals(3, records.size()); int i = 0; for (CSVRecord record : records) { TreeMap<String, Object> actualData = data.get(i++); assertEquals(actualData.size(), record.size()); for (String item : record) { assertTrue(actualData.containsValue(item)); } } csvParser.close(); }
From source file:org.apache.beam.sdk.extensions.sql.impl.schema.BeamTableUtils.java
public static BeamRecord csvLine2BeamSqlRow(CSVFormat csvFormat, String line, BeamRecordSqlType beamRecordSqlType) { List<Object> fieldsValue = new ArrayList<>(beamRecordSqlType.getFieldCount()); try (StringReader reader = new StringReader(line)) { CSVParser parser = csvFormat.parse(reader); CSVRecord rawRecord = parser.getRecords().get(0); if (rawRecord.size() != beamRecordSqlType.getFieldCount()) { throw new IllegalArgumentException(String.format("Expect %d fields, but actually %d", beamRecordSqlType.getFieldCount(), rawRecord.size())); } else {/*w w w. j ava 2 s . c o m*/ for (int idx = 0; idx < beamRecordSqlType.getFieldCount(); idx++) { String raw = rawRecord.get(idx); fieldsValue.add(autoCastField(beamRecordSqlType.getFieldTypeByIndex(idx), raw)); } } } catch (IOException e) { throw new IllegalArgumentException("decodeRecord failed!", e); } return new BeamRecord(beamRecordSqlType, fieldsValue); }