List of usage examples for org.apache.commons.csv CSVParser close
@Override public void close() throws IOException
From source file:com.amazonaws.services.dynamodbv2.online.index.integration.tests.ViolationCorrectionTest.java
/** * Validates the output of violation correction. *//*from w w w. j a va2 s . co m*/ private void validateCorrectionOutput(String correctionOutputFile, List<List<String>> errorRecords) throws IOException { BufferedReader br = null; CSVParser parser = null; try { br = new BufferedReader(new FileReader(new File(correctionOutputFile))); parser = new CSVParser(br, TestUtils.csvFormat); List<CSVRecord> records = parser.getRecords(); Assert.assertEquals("Error record count does not match", errorRecords.size(), records.size()); for (CSVRecord record : records) { boolean foundError = false; List<String> readRecord = new ArrayList<String>(); for (int i = 0; i < record.size(); i++) { if (record.get(i).equals(record.get(ViolationRecord.GSI_VALUE_UPDATE_ERROR))) { foundError = true; continue; } else { readRecord.add(record.get(i)); } } Assert.assertTrue("Error column not found", foundError); Assert.assertTrue("Unexpected record read from correction output", errorRecords.contains(readRecord)); errorRecords.remove(readRecord); } } finally { br.close(); parser.close(); } }
From source file:com.itemanalysis.jmetrik.file.JmetrikFileImporter.java
/** * 1. Gets the file header or creates one. * 2. Sets the number of columns/*from ww w .j a v a2s . c o m*/ * 3. Checks the type of data stored in each variable using the first rowsToScan rows. * Variables are integers by default. This method will change the data type to either * double or string. * */ private void setDataTypes() { CSVParser parser = null; Reader reader = null; try { reader = new InputStreamReader(new BOMInputStream(new FileInputStream(dataFile)), "UTF-8"); //Get column names from variable attributes colNames = new String[variableAttributeMap.size()]; int index = 0; Iterator<VariableName> iter = variableAttributeMap.keySet().iterator(); VariableName tempName = null; while (iter.hasNext()) { colNames[index++] = iter.next().toString(); } //Create a parser with variable names from the variable attributes if (hasHeader) { parser = new CSVParser(reader, dataFileFormat.withHeader(colNames).withSkipHeaderRecord(true).withCommentMarker('#')); } else { parser = new CSVParser(reader, dataFileFormat.withHeader(colNames).withCommentMarker('#')); } //Check data types in each column. String value = ""; Iterator<CSVRecord> csvIter = parser.iterator(); CSVRecord csvRecord = null; double testValue = 0; nrow = 0; while (csvIter.hasNext()) { csvRecord = csvIter.next(); iter = variableAttributeMap.keySet().iterator(); while (iter.hasNext()) { tempName = iter.next(); value = csvRecord.get(tempName.toString()).trim(); //Check that string can be converted to double. If not, Change variable type. //Ignore missing data and other special codes try { if (!"".equals(value) && !specialDataCodes.isMissing(value)) { testValue = Double.parseDouble(value); if (testValue != Math.floor(testValue)) { //if any value is a double, the variable is a double variableAttributeMap.get(tempName).setDataType(DataType.DOUBLE); } } } catch (NumberFormatException ex) { //if any value is a String, the variable is a String variableAttributeMap.get(tempName).setDataType(DataType.STRING); } } nrow++; } } catch (IOException ex) { theException = ex; } finally { try { if (parser != null) parser.close(); if (reader != null) reader.close(); } catch (IOException ex) { theException = ex; logger.fatal(ex); } } }
From source file:com.itemanalysis.jmetrik.file.JmetrikFileImporter.java
private void convertFile() { CSVParser parser = null; Reader reader = null;/*from w w w .ja v a 2 s. c o m*/ CSVPrinter printer = null; Writer writer = null; try { if (outputFile.exists()) { if (!overwrite) { theException = new IOException("File already exists and overwrite==false"); return; } } else { outputFile.createNewFile(); } //For debugging // System.out.println("CREATED: " + outputFile.getAbsolutePath()); //Writer header to file writer = new OutputStreamWriter(new FileOutputStream(outputFile)); printer = new CSVPrinter(writer, CSVFormat.DEFAULT.withCommentMarker('#')); printer.printComment("VERSION"); printer.printRecord(new String[] { "jmetrik1" }); printer.printComment("METADATA"); printer.printRecord(new String[] { Integer.valueOf(nrow).toString() }); printer.printComment("ATTRIBUTES"); for (VariableName v : variableAttributeMap.keySet()) { printer.printRecord(variableAttributeMap.get(v).getAttributeArray()); } printer.printComment("DATA"); //Write data to file reader = new InputStreamReader(new BOMInputStream(new FileInputStream(dataFile)), "UTF-8"); parser = new CSVParser(reader, dataFileFormat); if (hasHeader) { parser = new CSVParser(reader, dataFileFormat.withHeader(colNames).withSkipHeaderRecord(true)); } else { parser = new CSVParser(reader, dataFileFormat.withHeader(colNames)); } Iterator<CSVRecord> iter = parser.iterator(); CSVRecord csvRecord = null; VariableAttributes variableAttributes = null; DataType dataType = null; String temp = ""; while (iter.hasNext()) { csvRecord = iter.next(); for (VariableName v : variableAttributeMap.keySet()) { temp = csvRecord.get(v.toString()); variableAttributes = variableAttributeMap.get(v); dataType = variableAttributes.getDataType(); if (!variableAttributes.isMissing(temp)) { if (DataType.INTEGER == dataType) { printer.print(Double.valueOf(Double.parseDouble(temp)).intValue()); } else if (DataType.DOUBLE == dataType) { printer.print(Double.parseDouble(temp)); } else { printer.print(temp); } } else { printer.print(temp); } } printer.println(); } } catch (IOException ex) { theException = ex; } finally { try { if (parser != null) parser.close(); if (reader != null) reader.close(); if (printer != null) printer.close(); if (writer != null) writer.close(); } catch (IOException ex) { theException = ex; logger.fatal(ex); } } }
From source file:com.wx3.galacdecks.Bootstrap.java
private void importCards(GameDatastore datastore, String path) throws IOException { Reader reader = new FileReader(path); CSVParser parser = new CSVParser(reader, CSVFormat.EXCEL.withHeader()); int count = 0; for (CSVRecord record : parser) { String id = record.get("id"); String name = record.get("name"); String description = record.get("description"); String flavor = record.get("flavor"); String unitPrefab = record.get("unitPrefab"); String summonEffect = record.get("summonEffect"); String projectile = record.get("projectile"); String portrait = record.get("portrait"); Set<EntityTag> tags = new HashSet<EntityTag>(); for (EntityTag tag : EntityTag.values()) { if (record.isSet(tag.toString())) { if (record.get(tag).equals("Y")) { tags.add(tag);/*from w w w . j a v a2 s . co m*/ } } } String[] types = record.get("types").split(","); for (String type : types) { if (type.length() > 0) { EntityTag tag = EntityTag.valueOf(type); tags.add(tag); } } Map<EntityStat, Integer> stats = new HashMap<EntityStat, Integer>(); for (EntityStat stat : EntityStat.values()) { String statName = stat.toString(); if (record.isSet(statName)) { stats.put(stat, parseIntOrZero(record.get(statName))); } } List<EntityRule> rules = new ArrayList<EntityRule>(); String ruleField = record.get("rules"); String[] ruleIds = ruleField.split(","); for (String ruleId : ruleIds) { if (ruleId.length() > 0) { if (!ruleCache.containsKey(ruleId)) { throw new RuntimeException("Unable to find rule '" + ruleId + "'"); } EntityRule rule = ruleCache.get(ruleId); rules.add(rule); } } ValidatorScript playValidator = null; String playValidatorId = record.get("playValidator"); if (playValidatorId != null && !playValidatorId.isEmpty()) { if (!playValidatorCache.containsKey(playValidatorId)) { throw new RuntimeException("Unknown validator '" + playValidatorId + "'"); } playValidator = playValidatorCache.get(playValidatorId); } ValidatorScript discardValidator = null; String discardValidatorId = record.get("discardValidator"); if (discardValidatorId != null && !discardValidatorId.isEmpty()) { if (!discardValidatorCache.containsKey(discardValidatorId)) { throw new RuntimeException("Unknown validator '" + discardValidatorId + "'"); } discardValidator = discardValidatorCache.get(discardValidatorId); } Set<AiHint> aiHints = new HashSet<>(); String hintField = record.get("aiHints"); String[] hintIds = hintField.split(","); for (String hintId : hintIds) { if (hintId.length() > 0) { if (!aiHintCache.containsKey(hintId)) { throw new RuntimeException("Unable to find AI Hint '" + hintId + "'"); } AiHint hint = aiHintCache.get(hintId); aiHints.add(hint); } } EntityPrototype card = EntityPrototype.createPrototype(id, name, description, flavor, unitPrefab, summonEffect, projectile, portrait, tags, rules, playValidator, discardValidator, stats, aiHints); datastore.createPrototype(card); if (cardCache.containsKey(card.getId())) { throw new RuntimeException("Duplicate card id: " + card.getId()); } cardCache.put(card.getId(), card); ++count; } logger.info("Imported " + count + " cards"); parser.close(); }
From source file:com.teamnx.util.CSVToDateBase.java
/** * @param fileName//from w w w . j a v a2 s. co m * @param type * @return */ public List readCsvFile(String fileName, int type) { FileReader fileReader = null; CSVParser csvFileParser = null; List list = null; //CSVFormatheader mapping CSVFormat csvFileFormat = CSVFormat.DEFAULT.withHeader(file_header); try { //?FileReader object fileReader = new FileReader(fileName); //? CSVParser object csvFileParser = new CSVParser(fileReader, csvFileFormat); //CSVrecords List<CSVRecord> csvRecords = csvFileParser.getRecords(); // CSV switch (type) { case USER: List<User> userList = new ArrayList<User>(); for (int i = 1; i < csvRecords.size(); i++) { CSVRecord record = csvRecords.get(i); //? User user = new User(); user.setId(record.get("id")); user.setName(record.get("name")); user.setPassword(record.get("password")); user.setDepartment_id(Integer.parseInt(record.get("department_id"))); user.setCharacter(Integer.parseInt(record.get("character"))); user.setClass_id(record.get("class_id")); user.setDepartment_name(record.get("department_name")); userList.add(user); } list = userList; break; case DEPARTMENT: List<Department> departmentList = new ArrayList<Department>(); for (int i = 1; i < csvRecords.size(); i++) { CSVRecord record = csvRecords.get(i); //? Department department = new Department(); department.setId(Integer.parseInt(record.get("id"))); department.setName(record.get("name")); departmentList.add(department); } list = departmentList; break; case COURSE: List<Course> courseList = new ArrayList<Course>(); for (int i = 1; i < csvRecords.size(); i++) { CSVRecord record = csvRecords.get(i); //? Course course = new Course(); course.setId(record.get("id")); course.setName(record.get("name")); course.setDepartment_id(Integer.parseInt(record.get("department_id"))); course.setStart_time(Integer.parseInt(record.get("start_time"))); course.setEnd_time(Integer.parseInt(record.get("end_time"))); course.setPosition(record.get("position")); course.setSchedule(record.get("schedule")); course.setYear(Integer.parseInt(record.get("year"))); course.setSemester(Integer.parseInt(record.get("semester"))); int j = Integer.parseInt(record.get("category")); course.setCategory(j == 1 ? true : false); course.setMax_member(Integer.parseInt(record.get("max_member"))); courseList.add(course); } list = courseList; break; case STUDENT_COURSE: List<StudentCourse> studentCourseList = new ArrayList<StudentCourse>(); for (int i = 1; i < csvRecords.size(); i++) { CSVRecord record = csvRecords.get(i); StudentCourse studentCourse = new StudentCourse(); studentCourse.setId(record.get("id")); studentCourse.setCourseId(record.get("course_id")); studentCourse.setStudentId(record.get("student_id")); studentCourseList.add(studentCourse); } list = studentCourseList; break; case TEACHER_COURSE: List<TeacherCourse> teacherCourseList = new ArrayList<TeacherCourse>(); for (int i = 1; i < csvRecords.size(); i++) { CSVRecord record = csvRecords.get(i); TeacherCourse teacherCourse = new TeacherCourse(); teacherCourse.setId(record.get("id")); teacherCourse.setTeacherId(record.get("teacher_id")); teacherCourse.setCourseId(record.get("course_id")); teacherCourseList.add(teacherCourse); } list = teacherCourseList; break; } } catch (Exception e) { e.printStackTrace(); } finally { try { fileReader.close(); csvFileParser.close(); } catch (IOException e) { e.printStackTrace(); } finally { return list; } } }
From source file:com.amazonaws.services.dynamodbv2.online.index.integration.tests.ViolationCorrectionTest.java
/** * Iterates through detection output file: first leave updates blank based on missing updates per key. * Once it has reached the missing update number, it removes the expected gsi values as per the specified 'missingGsiExpectedHashValues'. * Note that once blank number is reached, it also starts adding updates. * It then iterates over the rows again and adds values for Yes/No/Invalid in the delete column. * It returns all error records, if present. If not, it returns all records. */// w ww . j av a 2 s . c o m private static List<List<String>> createCorrectionFile(final String detectionFile, final String correctionFile, final String gsiHashKeyName, final String gsiHashKeyType, final String gsiRangeKeyName, final String gsiRangeKeyType, final Map<String, String> tableHashToNewGsiHashValueMap, final Map<String, String> tableHashToNewGsiRangeValueMap, final int missingUpdatesPerKey, final int missingGsiExpectedHashValues, final int invalidValuesForDelete, final int numOfYesForDelete, final int numOfNoForDelete) throws IOException { List<List<String>> errorRecords = null; List<List<String>> allRecords = null; BufferedReader br = null; BufferedWriter bw = null; CSVParser parser = null; CSVPrinter csvPrinter = null; try { br = new BufferedReader(new FileReader(new File(detectionFile))); bw = new BufferedWriter(new FileWriter(new File(correctionFile))); parser = new CSVParser(br, TestUtils.csvFormat); csvPrinter = new CSVPrinter(bw, TestUtils.csvFormat); List<CSVRecord> detectorRecords = parser.getRecords(); int hashMissingUpdates = 0; int rangeMissingUpdates = 0; int missingGsiExpectedHashValuesCurrent = 0; // Print Header Map<String, Integer> header = parser.getHeaderMap(); csvPrinter.printRecord(header.keySet()); allRecords = new ArrayList<List<String>>(); for (CSVRecord csvRecord : detectorRecords) { List<String> newRecord = new ArrayList<String>(); String tableHashKeyRecorded = csvRecord.get(ViolationRecord.TABLE_HASH_KEY); String hashKeyViolationType = null; if (gsiHashKeyName != null) { hashKeyViolationType = csvRecord.get(ViolationRecord.GSI_HASH_KEY_VIOLATION_TYPE); } String rangeKeyViolationType = null; if (gsiRangeKeyName != null) { rangeKeyViolationType = csvRecord.get(ViolationRecord.GSI_RANGE_KEY_VIOLATION_TYPE); } for (int i = 0; i < csvRecord.size(); i++) { newRecord.add(i, csvRecord.get(i)); } String newGsiVal = null; if (hashKeyViolationType != null && (hashKeyViolationType.equals("Size Violation") || hashKeyViolationType.equals("Type Violation"))) { if (hashMissingUpdates < missingUpdatesPerKey) { allRecords.add(newRecord); hashMissingUpdates++; continue; } //Remove expected hash Values if (missingGsiExpectedHashValuesCurrent < missingGsiExpectedHashValues) { newRecord.remove((int) header.get(ViolationRecord.GSI_HASH_KEY)); newRecord.add(header.get(ViolationRecord.GSI_HASH_KEY), ""); missingGsiExpectedHashValuesCurrent++; } newRecord.remove((int) header.get(ViolationRecord.GSI_HASH_KEY_UPDATE_VALUE)); newGsiVal = getNewValue(gsiHashKeyType, 4 /*length*/); newRecord.add(header.get(ViolationRecord.GSI_HASH_KEY_UPDATE_VALUE), newGsiVal); tableHashToNewGsiHashValueMap.put(tableHashKeyRecorded, newGsiVal); } if (rangeKeyViolationType != null && (rangeKeyViolationType.equals("Size Violation") || rangeKeyViolationType.equals("Type Violation"))) { if (rangeMissingUpdates < missingUpdatesPerKey) { allRecords.add(newRecord); rangeMissingUpdates++; continue; } newRecord.remove(header.get(ViolationRecord.GSI_RANGE_KEY_UPDATE_VALUE)); newGsiVal = getNewValue(gsiRangeKeyType, 4 /*length*/); newRecord.add(header.get(ViolationRecord.GSI_RANGE_KEY_UPDATE_VALUE), newGsiVal); tableHashToNewGsiRangeValueMap.put(tableHashKeyRecorded, newGsiVal); } allRecords.add(newRecord); } // Add 'Y' or 'N' for delete column if (numOfNoForDelete > 0 || numOfYesForDelete > 0 || invalidValuesForDelete > 0) { errorRecords = new ArrayList<List<String>>(); int numOfYesAdded = 0; int numOfNoAdded = 0; int numOfInvalids = 0; for (List<String> record : allRecords) { if (numOfInvalids < invalidValuesForDelete) { record.remove(header.get(ViolationRecord.GSI_CORRECTION_DELETE_BLANK)); record.add(header.get(ViolationRecord.GSI_CORRECTION_DELETE_BLANK), "xx"); numOfInvalids++; errorRecords.add(record); continue; } if (numOfYesAdded < numOfYesForDelete) { record.remove(header.get(ViolationRecord.GSI_CORRECTION_DELETE_BLANK)); record.add(header.get(ViolationRecord.GSI_CORRECTION_DELETE_BLANK), "Y"); numOfYesAdded++; continue; } if (numOfNoAdded < numOfNoForDelete) { record.remove(header.get(ViolationRecord.GSI_CORRECTION_DELETE_BLANK)); record.add(header.get(ViolationRecord.GSI_CORRECTION_DELETE_BLANK), "N"); numOfNoAdded++; continue; } } } // Add all records to file csvPrinter.printRecords(allRecords); } finally { br.close(); bw.close(); parser.close(); csvPrinter.close(); } if (errorRecords != null) return errorRecords; else return allRecords; }
From source file:com.kdmanalytics.toif.report.internal.importWizard.TsvImportWizardPage.java
/** * Perform the actual load.//from w w w . j a v a 2s . co m * * @return */ public boolean finish() { // Check source file final String name = editor.getStringValue(); setErrorMessage("Importing " + name + " into " + project + "..."); IPath location = new Path(name); File file = location.toFile(); Reader in = null; CSVParser parser = null; try { in = new FileReader(file); CSVFormat format = CSVFormat.EXCEL.withDelimiter('\t').withIgnoreEmptyLines(); parser = new CSVParser(in, format); System.err.println("FILE: " + name); Map<Integer, String> lookup = new HashMap<Integer, String>(); boolean header = true; for (CSVRecord record : parser) { int size = record.size(); IFile ifile = null; String tool = null; String description = null; int line = 0; int offset = 0; int trust = 0; Boolean status = null; int kdmLine = 0; String cwe = null; String sfp = null; // Read the header first if (header) { System.err.print(" "); for (int i = 0; i < size; i++) { if (i > 0) System.err.print(","); String cell = record.get(i); lookup.put(i, cell); System.err.print(cell); } header = false; System.err.println(); System.err.println(" ------------------------------------------"); } // Otherwise this is a data row else { for (int i = 0; i < size; i++) { String cell = record.get(i); String colName = lookup.get(i); if ("Resource".equals(colName)) { IFileGroup group = new FileGroup(cell); try { IResource resource = MemberUtil.findMembers(project, group); if (resource != null) { ifile = (IFile) resource; } } catch (CoreException e) { e.printStackTrace(); } } else if ("SFP".equals(colName)) { sfp = cell; } else if ("CWE".equals(colName)) { cwe = cell; } // Valid is *old* name for "Citing Status" else if ("Valid".equals(colName)) { if (cell != null && !cell.trim().isEmpty()) { status = Boolean.parseBoolean(cell); } } else if ("Citing Status".equals(colName)) { if (cell != null && !cell.trim().isEmpty()) { status = Boolean.parseBoolean(cell); } } else if ("Trust".equals(colName)) { if (cell != null && !cell.trim().isEmpty()) { try { trust = Integer.parseInt(cell); } catch (NumberFormatException e) { } } } else if ("Confidence".equals(colName)) { if (cell != null && !cell.trim().isEmpty()) { try { trust = Integer.parseInt(cell); } catch (NumberFormatException e) { } } } else if ("Line Number".equals(colName)) { if (cell != null && !cell.trim().isEmpty()) { try { line = Integer.parseInt(cell); } catch (NumberFormatException e) { } } } else if ("KDM Line Number".equals(colName)) { if (cell != null && !cell.trim().isEmpty()) { try { kdmLine = Integer.parseInt(cell); } catch (NumberFormatException e) { } } } // "Generator Tool" is *old* name for "SCA Tool" else if ("Generator Tool".equals(colName)) { tool = cell; } else if ("SCA tool".equalsIgnoreCase(colName)) { tool = cell; } else if ("Weakness Description".equals(colName)) { description = cell; } else { System.err.println("WARNING: Unknown column name '" + colName + "'"); } } System.err.print(" "); System.err.print(sfp); System.err.print(","); System.err.print(cwe); System.err.print(","); System.err.print(status); System.err.print(","); System.err.print(trust); System.err.print(","); System.err.print(ifile); System.err.print(","); System.err.print(line); System.err.print(","); System.err.print(kdmLine); System.err.print(","); System.err.print(tool); System.err.print(","); System.err.print(description); System.err.println(); if (ifile != null) { // Create an associated finding. This will allow us to // set the citing status for the finding. If the // finding does not actually exist in the database this information // is still stored in case the finding exists in the future. FindingData finding = new FindingData(ifile, tool, description, line, offset, cwe, sfp); if (status != null) { finding.cite(status); } } } } try { IWorkbenchWindow window = PlatformUI.getWorkbench().getActiveWorkbenchWindow(); if (window != null) { IWorkbenchPage page = window.getActivePage(); if (page != null) { FindingView view = (FindingView) page.showView("com.kdmanalytics.toif.views.FindingView"); view.refresh(); } } } catch (PartInitException e) { e.printStackTrace(); } } catch (IOException e) { e.printStackTrace(); } finally { if (parser != null) { try { parser.close(); } catch (IOException e) { e.printStackTrace(); } } if (in != null) { try { in.close(); } catch (IOException e) { e.printStackTrace(); } } } // PlatformUI.getWorkbench().getDisplay().asyncExec(new Runnable() // { // public void run() // { // final ToifReportImportJob job = new ToifReportImportJob("Import SFP/CWE Data", project, // name); // job.setUser(true); // job.setPriority(Job.BUILD); // job.setRule(project); // job.schedule(); // } // }); return true; }
From source file:canreg.client.dataentry.Import.java
public static boolean importFiles(Task<Object, Void> task, Document doc, List<canreg.client.dataentry.Relation> map, File[] files, CanRegServerInterface server, ImportOptions io) throws SQLException, RemoteException, SecurityException, RecordLockedException { int numberOfLinesRead = 0; Writer reportWriter = new BufferedWriter(new OutputStreamWriter(System.out)); if (io.getReportFileName() != null && io.getReportFileName().trim().length() > 0) { try {//from w ww .j a v a 2s.c o m reportWriter = new BufferedWriter(new FileWriter(io.getReportFileName())); } catch (IOException ex) { Logger.getLogger(Import.class.getName()).log(Level.WARNING, null, ex); } } boolean success = false; Set<String> noNeedToLookAtPatientVariables = new TreeSet<String>(); noNeedToLookAtPatientVariables .add(canreg.common.Tools.toLowerCaseStandardized(io.getPatientIDVariableName())); noNeedToLookAtPatientVariables .add(canreg.common.Tools.toLowerCaseStandardized(io.getPatientRecordIDVariableName())); String[] lineElements; ResultCode worstResultCodeFound; // CSVReader reader = null; CSVParser parser = null; CSVFormat format = CSVFormat.DEFAULT.withFirstRecordAsHeader().withDelimiter(io.getSeparators()[0]); int linesToRead = io.getMaxLines(); try { // first we get the patients if (task != null) { task.firePropertyChange(PROGRESS, 0, 0); task.firePropertyChange(PATIENTS, 0, 0); } if (files[0] != null) { reportWriter .write("Starting to import patients from " + files[0].getAbsolutePath() + Globals.newline); FileInputStream patientFIS = new FileInputStream(files[0]); InputStreamReader patientISR = new InputStreamReader(patientFIS, io.getFileCharsets()[0]); Logger.getLogger(Import.class.getName()).log(Level.CONFIG, "Name of the character encoding {0}", patientISR.getEncoding()); int numberOfRecordsInFile = canreg.common.Tools.numberOfLinesInFile(files[0].getAbsolutePath()); numberOfLinesRead = 0; if (linesToRead > 0) { linesToRead = Math.min(numberOfRecordsInFile, linesToRead); } else { linesToRead = numberOfRecordsInFile; } parser = CSVParser.parse(files[0], io.getFileCharsets()[0], format); for (CSVRecord csvRecord : parser) { // We allow for null tasks... boolean savePatient = true; boolean deletePatient = false; int oldPatientDatabaseRecordID = -1; if (task != null) { task.firePropertyChange(PROGRESS, ((numberOfLinesRead - 1) * 100 / linesToRead) / 3, ((numberOfLinesRead) * 100 / linesToRead) / 3); task.firePropertyChange(PATIENTS, ((numberOfLinesRead - 1) * 100 / linesToRead), ((numberOfLinesRead) * 100 / linesToRead)); } // Build patient part Patient patient = new Patient(); for (int i = 0; i < map.size(); i++) { Relation rel = map.get(i); if (rel.getDatabaseTableVariableID() >= 0 && rel.getDatabaseTableName().equalsIgnoreCase("patient")) { if (rel.getVariableType().equalsIgnoreCase("Number")) { if (csvRecord.get(rel.getFileColumnNumber()).length() > 0) { try { patient.setVariable(rel.getDatabaseVariableName(), Integer.parseInt(csvRecord.get(rel.getFileColumnNumber()))); } catch (NumberFormatException ex) { Logger.getLogger(Import.class.getName()).log(Level.SEVERE, "Number format error in line: " + (numberOfLinesRead + 1 + 1) + ". ", ex); success = false; } } } else { patient.setVariable(rel.getDatabaseVariableName(), csvRecord.get(rel.getFileColumnNumber())); } } if (task != null) { task.firePropertyChange(RECORD, i - 1 / map.size() * 50, i / map.size() * 50); } } // debugOut(patient.toString()); // debugOut(tumour.toString()); // add patient to the database Object patientID = patient.getVariable(io.getPatientRecordIDVariableName()); Patient oldPatientRecord = null; try { oldPatientRecord = CanRegClientApp.getApplication().getPatientRecord((String) patientID, false); } catch (DistributedTableDescriptionException ex) { Logger.getLogger(Import.class.getName()).log(Level.SEVERE, null, ex); } catch (RecordLockedException ex) { Logger.getLogger(Import.class.getName()).log(Level.SEVERE, null, ex); } catch (RemoteException ex) { Logger.getLogger(Import.class.getName()).log(Level.SEVERE, null, ex); } catch (SQLException ex) { Logger.getLogger(Import.class.getName()).log(Level.SEVERE, null, ex); } catch (SecurityException ex) { Logger.getLogger(Import.class.getName()).log(Level.SEVERE, null, ex); } catch (UnknownTableException ex) { Logger.getLogger(Import.class.getName()).log(Level.SEVERE, null, ex); } if (oldPatientRecord != null) { // deal with discrepancies switch (io.getDiscrepancies()) { case ImportOptions.REJECT: savePatient = false; break; case ImportOptions.UPDATE: String updateReport = updateRecord(oldPatientRecord, patient); if (updateReport.length() > 0) { reportWriter.write(patient.getVariable(io.getTumourIDVariablename()) + Globals.newline + updateReport); } oldPatientDatabaseRecordID = (Integer) oldPatientRecord .getVariable(Globals.PATIENT_TABLE_RECORD_ID_VARIABLE_NAME); patient = oldPatientRecord; savePatient = true; break; case ImportOptions.OVERWRITE: // deleteTumour; oldPatientDatabaseRecordID = (Integer) oldPatientRecord .getVariable(Globals.PATIENT_TABLE_RECORD_ID_VARIABLE_NAME); String overWriteReport = overwriteRecord(oldPatientRecord, patient); if (overWriteReport.length() > 0) { reportWriter.write(patient.getVariable(io.getTumourIDVariablename()) + Globals.newline + overWriteReport); } patient = oldPatientRecord; savePatient = true; break; } // reportWriter.write(tumour.getVariable(io.getTumourIDVariablename()) + "Tumour already exists.\n"); } if (task != null) { task.firePropertyChange(RECORD, 50, 75); } if ((!io.isTestOnly())) { if (deletePatient) { server.deleteRecord(oldPatientDatabaseRecordID, Globals.PATIENT_TABLE_NAME); } if (savePatient) { if (patient.getVariable(Globals.PATIENT_TABLE_RECORD_ID_VARIABLE_NAME) != null) { server.editPatient(patient); } else { server.savePatient(patient); } } } if (task != null) { task.firePropertyChange(RECORD, 100, 75); } numberOfLinesRead++; if (Thread.interrupted()) { //We've been interrupted: no more importing. reportWriter.flush(); throw new InterruptedException(); } } parser.close(); reportWriter.write("Finished reading patients." + Globals.newline + Globals.newline); reportWriter.flush(); } if (task != null) { task.firePropertyChange(PATIENTS, 100, 100); task.firePropertyChange("progress", 33, 34); } // then we get the tumours if (task != null) { task.firePropertyChange(TUMOURS, 0, 0); } if (files[1] != null) { reportWriter .write("Starting to import tumours from " + files[1].getAbsolutePath() + Globals.newline); FileInputStream tumourFIS = new FileInputStream(files[1]); InputStreamReader tumourISR = new InputStreamReader(tumourFIS, io.getFileCharsets()[1]); Logger.getLogger(Import.class.getName()).log(Level.CONFIG, "Name of the character encoding {0}", tumourISR.getEncoding()); numberOfLinesRead = 0; int numberOfRecordsInFile = canreg.common.Tools.numberOfLinesInFile(files[1].getAbsolutePath()); if (linesToRead > 0) { linesToRead = Math.min(numberOfRecordsInFile, linesToRead); } else { linesToRead = numberOfRecordsInFile; } format = CSVFormat.DEFAULT.withFirstRecordAsHeader().withDelimiter(io.getSeparators()[1]); parser = CSVParser.parse(files[1], io.getFileCharsets()[1], format); for (CSVRecord csvRecord : parser) { // We allow for null tasks... boolean saveTumour = true; boolean deleteTumour = false; if (task != null) { task.firePropertyChange(PROGRESS, 33 + ((numberOfLinesRead - 1) * 100 / linesToRead) / 3, 33 + ((numberOfLinesRead) * 100 / linesToRead) / 3); task.firePropertyChange(TUMOURS, ((numberOfLinesRead - 1) * 100 / linesToRead), ((numberOfLinesRead) * 100 / linesToRead)); } // Build tumour part Tumour tumour = new Tumour(); for (int i = 0; i < map.size(); i++) { Relation rel = map.get(i); if (rel.getDatabaseTableVariableID() >= 0 && rel.getDatabaseTableName().equalsIgnoreCase("tumour")) { if (rel.getVariableType().equalsIgnoreCase("Number")) { if (csvRecord.get(rel.getFileColumnNumber()).length() > 0) { try { tumour.setVariable(rel.getDatabaseVariableName(), Integer.parseInt(csvRecord.get(rel.getFileColumnNumber()))); } catch (NumberFormatException ex) { Logger.getLogger(Import.class.getName()).log(Level.SEVERE, "Number format error in line: " + (numberOfLinesRead + 1 + 1) + ". ", ex); success = false; } } } else { tumour.setVariable(rel.getDatabaseVariableName(), csvRecord.get(rel.getFileColumnNumber())); } } if (task != null) { task.firePropertyChange(RECORD, i - 1 / map.size() * 50, i / map.size() * 50); } } // see if this tumour exists in the database already // TODO: Implement this using arrays and getTumourRexords instead Tumour tumour2 = null; try { tumour2 = CanRegClientApp.getApplication().getTumourRecordBasedOnTumourID( (String) tumour.getVariable(io.getTumourIDVariablename()), false); } catch (DistributedTableDescriptionException ex) { Logger.getLogger(Import.class.getName()).log(Level.SEVERE, null, ex); } catch (RecordLockedException ex) { Logger.getLogger(Import.class.getName()).log(Level.SEVERE, null, ex); } catch (RemoteException ex) { Logger.getLogger(Import.class.getName()).log(Level.SEVERE, null, ex); } catch (SQLException ex) { Logger.getLogger(Import.class.getName()).log(Level.SEVERE, null, ex); } catch (SecurityException ex) { Logger.getLogger(Import.class.getName()).log(Level.SEVERE, null, ex); } catch (UnknownTableException ex) { Logger.getLogger(Import.class.getName()).log(Level.SEVERE, null, ex); } if (tumour2 != null) { // deal with discrepancies switch (io.getDiscrepancies()) { case ImportOptions.REJECT: saveTumour = false; break; case ImportOptions.UPDATE: String updateReport = updateRecord(tumour2, tumour); if (updateReport.length() > 0) { reportWriter.write(tumour.getVariable(io.getTumourIDVariablename()) + Globals.newline + updateReport); } tumour = tumour2; saveTumour = true; break; case ImportOptions.OVERWRITE: // deleteTumour; deleteTumour = true; saveTumour = true; break; } // reportWriter.write(tumour.getVariable(io.getTumourIDVariablename()) + "Tumour already exists.\n"); } Patient patient = null; try { patient = CanRegClientApp.getApplication().getPatientRecord( (String) tumour.getVariable(io.getPatientRecordIDTumourTableVariableName()), false); } catch (DistributedTableDescriptionException ex) { Logger.getLogger(Import.class.getName()).log(Level.SEVERE, null, ex); } catch (RecordLockedException ex) { Logger.getLogger(Import.class.getName()).log(Level.SEVERE, null, ex); } catch (RemoteException ex) { Logger.getLogger(Import.class.getName()).log(Level.SEVERE, null, ex); } catch (SQLException ex) { Logger.getLogger(Import.class.getName()).log(Level.SEVERE, null, ex); } catch (SecurityException ex) { Logger.getLogger(Import.class.getName()).log(Level.SEVERE, null, ex); } catch (UnknownTableException ex) { Logger.getLogger(Import.class.getName()).log(Level.SEVERE, null, ex); } if (patient != null) { if (io.isDoChecks() && saveTumour) { // run the edits... String message = ""; LinkedList<CheckResult> checkResults = canreg.client.CanRegClientApp.getApplication() .performChecks(patient, tumour); Map<Globals.StandardVariableNames, CheckResult.ResultCode> mapOfVariablesAndWorstResultCodes = new EnumMap<Globals.StandardVariableNames, CheckResult.ResultCode>( Globals.StandardVariableNames.class); worstResultCodeFound = CheckResult.ResultCode.OK; for (CheckResult result : checkResults) { if (result.getResultCode() != CheckResult.ResultCode.OK && result.getResultCode() != CheckResult.ResultCode.NotDone) { if (!result.getResultCode().equals(CheckResult.ResultCode.Missing)) { message += result + "\t"; worstResultCodeFound = CheckResult.decideWorstResultCode( result.getResultCode(), worstResultCodeFound); for (Globals.StandardVariableNames standardVariableName : result .getVariablesInvolved()) { CheckResult.ResultCode worstResultCodeFoundForThisVariable = mapOfVariablesAndWorstResultCodes .get(standardVariableName); if (worstResultCodeFoundForThisVariable == null) { mapOfVariablesAndWorstResultCodes.put(standardVariableName, result.getResultCode()); } else if (CheckResult.compareResultSets(result.getResultCode(), worstResultCodeFoundForThisVariable) > 0) { mapOfVariablesAndWorstResultCodes.put(standardVariableName, result.getResultCode()); } } } } // Logger.getLogger(Import.class.getName()).log(Level.INFO, result.toString()); } // always generate ICD10... // ConversionResult[] conversionResult = canreg.client.CanRegClientApp.getApplication().performConversions(Converter.ConversionName.ICDO3toICD10, patient, tumour); // tumour.setVariable(io.getICD10VariableName(), conversionResult[0].getValue()); if (worstResultCodeFound != CheckResult.ResultCode.Invalid && worstResultCodeFound != CheckResult.ResultCode.Missing) { // generate ICD10 codes ConversionResult[] conversionResult = canreg.client.CanRegClientApp.getApplication() .performConversions(Converter.ConversionName.ICDO3toICD10, patient, tumour); tumour.setVariable(io.getICD10VariableName(), conversionResult[0].getValue()); // generate ICCC codes ConversionResult[] conversionResultICCC = canreg.client.CanRegClientApp .getApplication() .performConversions(Converter.ConversionName.ICDO3toICCC3, patient, tumour); tumour.setVariable(io.getICCCVariableName(), conversionResultICCC[0].getValue()); } else { tumour.setVariable(io.getTumourRecordStatus(), "0"); } if (worstResultCodeFound == CheckResult.ResultCode.OK) { // message += "Cross-check conclusion: Valid"; } else { reportWriter.write(tumour.getVariable(io.getTumourIDVariablename()) + "\t" + message + Globals.newline); // System.out.println(tumour.getVariable(io.getTumourIDVariablename()) + " " + message); } tumour.setVariable(io.getTumourCheckStatus(), CheckResult.toDatabaseVariable(worstResultCodeFound)); } else { // try to generate ICD10, if missing, anyway String icd10 = (String) tumour.getVariable(io.getICD10VariableName()); if (icd10 == null || icd10.trim().length() == 0) { ConversionResult[] conversionResult = canreg.client.CanRegClientApp.getApplication() .performConversions(Converter.ConversionName.ICDO3toICD10, patient, tumour); tumour.setVariable(io.getICD10VariableName(), conversionResult[0].getValue()); } // try to generate ICCC3, if missing, anyway String iccc = (String) tumour.getVariable(io.getICCCVariableName()); if (iccc == null || iccc.trim().length() == 0) { ConversionResult[] conversionResult = canreg.client.CanRegClientApp.getApplication() .performConversions(Converter.ConversionName.ICDO3toICCC3, patient, tumour); tumour.setVariable(io.getICCCVariableName(), conversionResult[0].getValue()); } } } else { reportWriter.write(tumour.getVariable(io.getTumourIDVariablename()) + "\t" + "No patient matches this Tumour." + Globals.newline); tumour.setVariable(io.getTumourRecordStatus(), "0"); tumour.setVariable(io.getTumourCheckStatus(), CheckResult.toDatabaseVariable(ResultCode.Missing)); } if (task != null) { task.firePropertyChange(RECORD, 50, 75); } if (!io.isTestOnly()) { if (deleteTumour) { server.deleteRecord( (Integer) tumour2.getVariable(Globals.TUMOUR_TABLE_RECORD_ID_VARIABLE_NAME), Globals.TUMOUR_TABLE_NAME); } if (saveTumour) { // if tumour has no record ID we save it if (tumour.getVariable(Globals.TUMOUR_TABLE_RECORD_ID_VARIABLE_NAME) == null) { server.saveTumour(tumour); } // if not we edit it else { server.editTumour(tumour); } } } if (task != null) { task.firePropertyChange(RECORD, 75, 100); } //Read next line of data numberOfLinesRead++; if (Thread.interrupted()) { //We've been interrupted: no more importing. reportWriter.flush(); throw new InterruptedException(); } } parser.close(); reportWriter.write("Finished reading tumours." + Globals.newline + Globals.newline); reportWriter.flush(); } if (task != null) { task.firePropertyChange(TUMOURS, 100, 100); } // then at last we get the sources if (task != null) { task.firePropertyChange(SOURCES, 0, 0); task.firePropertyChange(PROGRESS, 66, 66); } if (files[2] != null) { reportWriter .write("Starting to import sources from " + files[2].getAbsolutePath() + Globals.newline); FileInputStream sourceFIS = new FileInputStream(files[2]); InputStreamReader sourceISR = new InputStreamReader(sourceFIS, io.getFileCharsets()[2]); Logger.getLogger(Import.class.getName()).log(Level.CONFIG, "Name of the character encoding {0}", sourceISR.getEncoding()); numberOfLinesRead = 0; int numberOfRecordsInFile = canreg.common.Tools.numberOfLinesInFile(files[2].getAbsolutePath()); if (linesToRead > 0) { linesToRead = Math.min(numberOfRecordsInFile, linesToRead); } else { linesToRead = numberOfRecordsInFile; } format = CSVFormat.DEFAULT.withFirstRecordAsHeader().withDelimiter(io.getSeparators()[2]); parser = CSVParser.parse(files[2], io.getFileCharsets()[2], format); for (CSVRecord csvRecord : parser) { // We allow for null tasks... if (task != null) { task.firePropertyChange(PROGRESS, 67 + ((numberOfLinesRead - 1) * 100 / linesToRead) / 3, 67 + ((numberOfLinesRead) * 100 / linesToRead) / 3); task.firePropertyChange(SOURCES, ((numberOfLinesRead - 1) * 100 / linesToRead), ((numberOfLinesRead) * 100 / linesToRead)); } // Build source part Source source = new Source(); for (int i = 0; i < map.size(); i++) { Relation rel = map.get(i); if (rel.getDatabaseTableVariableID() >= 0 && rel.getDatabaseTableName().equalsIgnoreCase(Globals.SOURCE_TABLE_NAME)) { if (rel.getVariableType().equalsIgnoreCase("Number")) { if (csvRecord.get(rel.getFileColumnNumber()).length() > 0) { try { source.setVariable(rel.getDatabaseVariableName(), Integer.parseInt(csvRecord.get(rel.getFileColumnNumber()))); } catch (NumberFormatException ex) { Logger.getLogger(Import.class.getName()).log(Level.SEVERE, "Number format error in line: " + (numberOfLinesRead + 1 + 1) + ". ", ex); success = false; } } } else { source.setVariable(rel.getDatabaseVariableName(), csvRecord.get(rel.getFileColumnNumber())); } } if (task != null) { task.firePropertyChange(RECORD, i - 1 / map.size() * 50, i / map.size() * 50); } } Tumour tumour = null; try { tumour = CanRegClientApp.getApplication().getTumourRecordBasedOnTumourID( (String) source.getVariable(io.getTumourIDSourceTableVariableName()), false); } catch (DistributedTableDescriptionException ex) { Logger.getLogger(Import.class.getName()).log(Level.SEVERE, null, ex); } catch (RecordLockedException ex) { Logger.getLogger(Import.class.getName()).log(Level.SEVERE, null, ex); } catch (RemoteException ex) { Logger.getLogger(Import.class.getName()).log(Level.SEVERE, null, ex); } catch (SQLException ex) { Logger.getLogger(Import.class.getName()).log(Level.SEVERE, null, ex); } catch (SecurityException ex) { Logger.getLogger(Import.class.getName()).log(Level.SEVERE, null, ex); } catch (UnknownTableException ex) { Logger.getLogger(Import.class.getName()).log(Level.SEVERE, null, ex); } if (task != null) { task.firePropertyChange(RECORD, 50, 75); } boolean addSource = true; if (tumour != null) { Set<Source> sources = tumour.getSources(); Object sourceRecordID = source.getVariable(io.getSourceIDVariablename()); // look for source in sources for (Source oldSource : sources) { if (oldSource.getVariable(io.getSourceIDVariablename()).equals(sourceRecordID)) { // deal with discrepancies switch (io.getDiscrepancies()) { case ImportOptions.REJECT: addSource = false; break; case ImportOptions.UPDATE: String updateReport = updateRecord(oldSource, source); if (updateReport.length() > 0) { reportWriter.write(tumour.getVariable(io.getTumourIDVariablename()) + Globals.newline + updateReport); } source = oldSource; addSource = false; break; case ImportOptions.OVERWRITE: // deleteTumour; sources.remove(oldSource); addSource = true; break; } } } if (addSource) { sources.add(source); } tumour.setSources(sources); if (!io.isTestOnly()) { server.editTumour(tumour); } } else { Logger.getLogger(Import.class.getName()).log(Level.SEVERE, null, "No tumour for source record."); } if (task != null) { task.firePropertyChange(RECORD, 75, 100); } //Read next line of data numberOfLinesRead++; if (Thread.interrupted()) { //We've been interrupted: no more importing. reportWriter.flush(); throw new InterruptedException(); } } reportWriter.write("Finished reading sources." + Globals.newline + Globals.newline); reportWriter.flush(); parser.close(); } if (task != null) { task.firePropertyChange(SOURCES, 100, 100); task.firePropertyChange(PROGRESS, 100, 100); while (!task.isProgressPropertyValid()) { // wait untill progress has been updated... } } reportWriter.write("Finished" + Globals.newline); reportWriter.flush(); success = true; } catch (IOException ex) { Logger.getLogger(Import.class.getName()).log(Level.SEVERE, "Error in line: " + (numberOfLinesRead + 1 + 1) + ". ", ex); success = false; } catch (InterruptedException ex) { Logger.getLogger(Import.class.getName()).log(Level.INFO, "Interupted on line: " + (numberOfLinesRead + 1) + ". ", ex); success = true; } catch (IndexOutOfBoundsException ex) { Logger.getLogger(Import.class.getName()).log(Level.SEVERE, "String too short error in line: " + (numberOfLinesRead + 1 + 1) + ". ", ex); success = false; } finally { if (parser != null) { try { parser.close(); } catch (IOException ex) { Logger.getLogger(Import.class.getName()).log(Level.SEVERE, null, ex); } } try { reportWriter.flush(); reportWriter.close(); } catch (IOException ex) { Logger.getLogger(Import.class.getName()).log(Level.SEVERE, null, ex); } } if (task != null) { task.firePropertyChange(PROGRESS, 100, 100); task.firePropertyChange("finished", null, null); } return success; }
From source file:edu.harvard.iq.dataverse.ingest.tabulardata.impl.plugins.csv.CSVFileReader.java
public int readFile(BufferedReader csvReader, DataTable dataTable, PrintWriter finalOut) throws IOException { List<DataVariable> variableList = new ArrayList<>(); CSVParser parser = new CSVParser(csvReader, inFormat.withHeader()); Map<String, Integer> headers = parser.getHeaderMap(); int i = 0;/*w w w. j av a 2s . c o m*/ for (String varName : headers.keySet()) { if (varName == null || varName.isEmpty()) { // TODO: // Add a sensible variable name validation algorithm. // -- L.A. 4.0 alpha 1 throw new IOException(BundleUtil.getStringFromBundle("ingest.csv.invalidHeader")); } DataVariable dv = new DataVariable(); dv.setName(varName); dv.setLabel(varName); dv.setInvalidRanges(new ArrayList<>()); dv.setSummaryStatistics(new ArrayList<>()); dv.setUnf("UNF:6:NOTCALCULATED"); dv.setCategories(new ArrayList<>()); variableList.add(dv); dv.setTypeCharacter(); dv.setIntervalDiscrete(); dv.setFileOrder(i); dv.setDataTable(dataTable); i++; } dataTable.setVarQuantity((long) variableList.size()); dataTable.setDataVariables(variableList); boolean[] isNumericVariable = new boolean[headers.size()]; boolean[] isIntegerVariable = new boolean[headers.size()]; boolean[] isTimeVariable = new boolean[headers.size()]; boolean[] isDateVariable = new boolean[headers.size()]; for (i = 0; i < headers.size(); i++) { // OK, let's assume that every variable is numeric; // but we'll go through the file and examine every value; the // moment we find a value that's not a legit numeric one, we'll // assume that it is in fact a String. isNumericVariable[i] = true; isIntegerVariable[i] = true; isDateVariable[i] = true; isTimeVariable[i] = true; } // First, "learning" pass. // (we'll save the incoming stream in another temp file:) SimpleDateFormat[] selectedDateTimeFormat = new SimpleDateFormat[headers.size()]; SimpleDateFormat[] selectedDateFormat = new SimpleDateFormat[headers.size()]; File firstPassTempFile = File.createTempFile("firstpass-", ".csv"); try (CSVPrinter csvFilePrinter = new CSVPrinter( // TODO allow other parsers of tabular data to use this parser by changin inFormat new FileWriter(firstPassTempFile.getAbsolutePath()), inFormat)) { //Write headers csvFilePrinter.printRecord(headers.keySet()); for (CSVRecord record : parser.getRecords()) { // Checks if #records = #columns in header if (!record.isConsistent()) { List<String> args = Arrays.asList(new String[] { "" + (parser.getCurrentLineNumber() - 1), "" + headers.size(), "" + record.size() }); throw new IOException(BundleUtil.getStringFromBundle("ingest.csv.recordMismatch", args)); } for (i = 0; i < headers.size(); i++) { String varString = record.get(i); isIntegerVariable[i] = isIntegerVariable[i] && varString != null && (varString.isEmpty() || varString.equals("null") || (firstNumCharSet.contains(varString.charAt(0)) && StringUtils.isNumeric(varString.substring(1)))); if (isNumericVariable[i]) { // If variable might be "numeric" test to see if this value is a parsable number: if (varString != null && !varString.isEmpty()) { boolean isNumeric = false; boolean isInteger = false; if (varString.equalsIgnoreCase("NaN") || varString.equalsIgnoreCase("NA") || varString.equalsIgnoreCase("Inf") || varString.equalsIgnoreCase("+Inf") || varString.equalsIgnoreCase("-Inf") || varString.equalsIgnoreCase("null")) { continue; } else { try { Double testDoubleValue = new Double(varString); continue; } catch (NumberFormatException ex) { // the token failed to parse as a double // so the column is a string variable. } } isNumericVariable[i] = false; } } // If this is not a numeric column, see if it is a date collumn // by parsing the cell as a date or date-time value: if (!isNumericVariable[i]) { Date dateResult = null; if (isTimeVariable[i]) { if (varString != null && !varString.isEmpty()) { boolean isTime = false; if (selectedDateTimeFormat[i] != null) { ParsePosition pos = new ParsePosition(0); dateResult = selectedDateTimeFormat[i].parse(varString, pos); if (dateResult != null && pos.getIndex() == varString.length()) { // OK, successfully parsed a value! isTime = true; } } else { for (SimpleDateFormat format : TIME_FORMATS) { ParsePosition pos = new ParsePosition(0); dateResult = format.parse(varString, pos); if (dateResult != null && pos.getIndex() == varString.length()) { // OK, successfully parsed a value! isTime = true; selectedDateTimeFormat[i] = format; break; } } } if (!isTime) { isTimeVariable[i] = false; // if the token didn't parse as a time value, // we will still try to parse it as a date, below. // unless this column is NOT a date. } else { // And if it is a time value, we are going to assume it's // NOT a date. isDateVariable[i] = false; } } } if (isDateVariable[i]) { if (varString != null && !varString.isEmpty()) { boolean isDate = false; // TODO: // Strictly speaking, we should be doing the same thing // here as with the time formats above; select the // first one that works, then insist that all the // other values in this column match it... but we // only have one, as of now, so it should be ok. // -- L.A. 4.0 beta for (SimpleDateFormat format : DATE_FORMATS) { // Strict parsing - it will throw an // exception if it doesn't parse! format.setLenient(false); try { format.parse(varString); isDate = true; selectedDateFormat[i] = format; break; } catch (ParseException ex) { //Do nothing } } isDateVariable[i] = isDate; } } } } csvFilePrinter.printRecord(record); } } dataTable.setCaseQuantity(parser.getRecordNumber()); parser.close(); csvReader.close(); // Re-type the variables that we've determined are numerics: for (i = 0; i < headers.size(); i++) { if (isNumericVariable[i]) { dataTable.getDataVariables().get(i).setTypeNumeric(); if (isIntegerVariable[i]) { dataTable.getDataVariables().get(i).setIntervalDiscrete(); } else { dataTable.getDataVariables().get(i).setIntervalContinuous(); } } else if (isDateVariable[i] && selectedDateFormat[i] != null) { // Dates are still Strings, i.e., they are "character" and "discrete"; // But we add special format values for them: dataTable.getDataVariables().get(i).setFormat(DATE_FORMATS[0].toPattern()); dataTable.getDataVariables().get(i).setFormatCategory("date"); } else if (isTimeVariable[i] && selectedDateTimeFormat[i] != null) { // Same for time values: dataTable.getDataVariables().get(i).setFormat(selectedDateTimeFormat[i].toPattern()); dataTable.getDataVariables().get(i).setFormatCategory("time"); } } // Second, final pass. try (BufferedReader secondPassReader = new BufferedReader(new FileReader(firstPassTempFile))) { parser = new CSVParser(secondPassReader, inFormat.withHeader()); String[] caseRow = new String[headers.size()]; for (CSVRecord record : parser) { if (!record.isConsistent()) { List<String> args = Arrays.asList(new String[] { "" + (parser.getCurrentLineNumber() - 1), "" + headers.size(), "" + record.size() }); throw new IOException(BundleUtil.getStringFromBundle("ingest.csv.recordMismatch", args)); } for (i = 0; i < headers.size(); i++) { String varString = record.get(i); if (isNumericVariable[i]) { if (varString == null || varString.isEmpty() || varString.equalsIgnoreCase("NA")) { // Missing value - represented as an empty string in // the final tab file caseRow[i] = ""; } else if (varString.equalsIgnoreCase("NaN")) { // "Not a Number" special value: caseRow[i] = "NaN"; } else if (varString.equalsIgnoreCase("Inf") || varString.equalsIgnoreCase("+Inf")) { // Positive infinity: caseRow[i] = "Inf"; } else if (varString.equalsIgnoreCase("-Inf")) { // Negative infinity: caseRow[i] = "-Inf"; } else if (varString.equalsIgnoreCase("null")) { // By request from Gus - "NULL" is recognized as a // numeric zero: caseRow[i] = isIntegerVariable[i] ? "0" : "0.0"; } else { /* No re-formatting is done on any other numeric values. * We'll save them as they were, for archival purposes. * The alternative solution - formatting in sci. notation * is commented-out below. */ caseRow[i] = varString; /* if (isIntegerVariable[i]) { try { Integer testIntegerValue = new Integer(varString); caseRow[i] = testIntegerValue.toString(); } catch (NumberFormatException ex) { throw new IOException("Failed to parse a value recognized as an integer in the first pass! (?)"); } } else { try { Double testDoubleValue = new Double(varString); if (testDoubleValue.equals(0.0)) { caseRow[i] = "0.0"; } else { // One possible implementation: // // Round our fractional values to 15 digits // (minimum number of digits of precision guaranteed by // type Double) and format the resulting representations // in a IEEE 754-like "scientific notation" - for ex., // 753.24 will be encoded as 7.5324e2 BigDecimal testBigDecimal = new BigDecimal(varString, doubleMathContext); caseRow[i] = String.format(FORMAT_IEEE754, testBigDecimal); // Strip meaningless zeros and extra + signs: caseRow[i] = caseRow[i].replaceFirst("00*e", "e"); caseRow[i] = caseRow[i].replaceFirst("\\.e", ".0e"); caseRow[i] = caseRow[i].replaceFirst("e\\+00", ""); caseRow[i] = caseRow[i].replaceFirst("^\\+", ""); } } catch (NumberFormatException ex) { throw new IOException("Failed to parse a value recognized as numeric in the first pass! (?)"); } } */ } } else if (isTimeVariable[i] || isDateVariable[i]) { // Time and Dates are stored NOT quoted (don't ask). if (varString != null) { // Dealing with quotes: // remove the leading and trailing quotes, if present: varString = varString.replaceFirst("^\"*", ""); varString = varString.replaceFirst("\"*$", ""); caseRow[i] = varString; } else { caseRow[i] = ""; } } else { // Treat as a String: // Strings are stored in tab files quoted; // Missing values are stored as an empty string // between two tabs (or one tab and the new line); // Empty strings stored as "" (quoted empty string). // For the purposes of this CSV ingest reader, we are going // to assume that all the empty strings in the file are // indeed empty strings, and NOT missing values: if (varString != null) { // escape the quotes, newlines, and tabs: varString = varString.replace("\"", "\\\""); varString = varString.replace("\n", "\\n"); varString = varString.replace("\t", "\\t"); // final pair of quotes: varString = "\"" + varString + "\""; caseRow[i] = varString; } else { caseRow[i] = "\"\""; } } } finalOut.println(StringUtils.join(caseRow, "\t")); } } long linecount = parser.getRecordNumber(); finalOut.close(); parser.close(); dbglog.fine("Tmp File: " + firstPassTempFile); // Firstpass file is deleted to prevent tmp from filling up. firstPassTempFile.delete(); if (dataTable.getCaseQuantity().intValue() != linecount) { List<String> args = Arrays .asList(new String[] { "" + dataTable.getCaseQuantity().intValue(), "" + linecount }); throw new IOException(BundleUtil.getStringFromBundle("ingest.csv.line_mismatch", args)); } return (int) linecount; }
From source file:com.amazonaws.services.dynamodbv2.online.index.integration.tests.ViolationDetectionTest.java
/** * Verifies the contents of the violation detection output file */// w w w . ja va 2s. c om private void checkViolationFileOutput(String detectorOpFile, String tableHashKey, String tableRangeKey, String tableHashKeyType, String tableRangeKeyType, boolean violatedValueExists, List<Map<String, AttributeValue>> hashKeySizeViolations, List<Map<String, AttributeValue>> rangeKeySizeViolations, List<Map<String, AttributeValue>> hashKeyTypeViolations, List<Map<String, AttributeValue>> rangeKeyTypeViolations, int violatedSize, String gsiHashKeyName, String gsiHashKeyType, String gsiRangeKeyName, String gsiRangeKeyType) throws IOException { Map<String, String> hashToRangeHashSizeViolationsMap = new HashMap<String, String>(); Map<String, String> hashToRangeRangeSizeViolationsMap = new HashMap<String, String>(); Map<String, String> hashToRangeHashTypeViolationsMap = new HashMap<String, String>(); Map<String, String> hashToRangeRangeTypeViolationsMap = new HashMap<String, String>(); Map<String, String> hashToGsiHashSizeViolationsMap = new HashMap<String, String>(); Map<String, String> hashToGsiRangeSizeViolationsMap = new HashMap<String, String>(); Map<String, String> hashToGsiHashTypeViolationsMap = new HashMap<String, String>(); Map<String, String> hashToGsiRangeTypeViolationsMap = new HashMap<String, String>(); BufferedReader br = null; CSVParser parser = null; try { br = new BufferedReader(new FileReader(new File(detectorOpFile))); parser = new CSVParser(br, TestUtils.csvFormat); List<CSVRecord> csvRecords = parser.getRecords(); for (CSVRecord csvRecord : csvRecords) { String hashKey = csvRecord.get(ViolationRecord.TABLE_HASH_KEY); String rangeKey = csvRecord.get(ViolationRecord.TABLE_RANGE_KEY); String gsiHashKeyValue = null; if (violatedValueExists) { gsiHashKeyValue = csvRecord.get(ViolationRecord.GSI_HASH_KEY); } String hashKeyViolationType = csvRecord.get(ViolationRecord.GSI_HASH_KEY_VIOLATION_TYPE); String hashKeyViolationDesc = csvRecord.get(ViolationRecord.GSI_HASH_KEY_VIOLATION_DESC); String gsiRangeKeyValue = null; if (violatedValueExists) { gsiRangeKeyValue = csvRecord.get(ViolationRecord.GSI_RANGE_KEY); } String rangeKeyViolationType = csvRecord.get(ViolationRecord.GSI_RANGE_KEY_VIOLATION_TYPE); String rangeKeyViolationDesc = csvRecord.get(ViolationRecord.GSI_RANGE_KEY_VIOLATION_DESC); boolean foundViolation = false; if (hashKeyViolationType.equals("Size Violation")) { foundViolation = true; hashToRangeHashSizeViolationsMap.put(hashKey, rangeKey); if (violatedValueExists) { hashToGsiHashSizeViolationsMap.put(hashKey, gsiHashKeyValue); } Assert.assertTrue("Gsi hash key size violation description is incorrect", hashKeyViolationDesc.equals("Max Bytes Allowed: " + TestUtils.MAX_HASH_KEY_SIZE + " Found: " + violatedSize)); } else if (hashKeyViolationType.equals("Type Violation")) { foundViolation = true; hashToRangeHashTypeViolationsMap.put(hashKey, rangeKey); if (violatedValueExists) { hashToGsiHashTypeViolationsMap.put(hashKey, gsiHashKeyValue); } Assert.assertTrue("Gsi hash key type violation description is incorrect", hashKeyViolationDesc.equals("Expected: " + gsiHashKeyType + " Found: " + TestUtils.returnDifferentAttributeType(gsiHashKeyType))); } else { Assert.assertTrue("Hash key violation description exists even when there is no violation type", hashKeyViolationDesc.equals("")); } if (rangeKeyViolationType.equals("Size Violation")) { foundViolation = true; hashToRangeRangeSizeViolationsMap.put(hashKey, rangeKey); if (violatedValueExists) { hashToGsiRangeSizeViolationsMap.put(hashKey, gsiRangeKeyValue); } Assert.assertTrue("GSI range key size violation description is incorrect", rangeKeyViolationDesc.equals("Max Bytes Allowed: " + TestUtils.MAX_RANGE_KEY_SIZE + " Found: " + violatedSize)); } else if (rangeKeyViolationType.equals("Type Violation")) { foundViolation = true; hashToRangeRangeTypeViolationsMap.put(hashKey, rangeKey); if (violatedValueExists) { hashToGsiRangeTypeViolationsMap.put(hashKey, gsiRangeKeyValue); } Assert.assertTrue("Gsi range key type violation description is incorrect", rangeKeyViolationDesc.equals("Expected: " + gsiRangeKeyType + " Found: " + TestUtils.returnDifferentAttributeType(gsiRangeKeyType))); } else { Assert.assertTrue("Range key violation description exists even when there is no violation type", rangeKeyViolationDesc.equals("")); } Assert.assertTrue("No violation found in a row!", foundViolation); } if (hashKeySizeViolations != null) { for (Map<String, AttributeValue> item : hashKeySizeViolations) { AttributeValue tableHashAttr = item.get(tableHashKey); String expectedTableHashKey = AttributeValueConverter.toBlankString(tableHashAttr); if (hashToRangeHashSizeViolationsMap.containsKey(expectedTableHashKey)) { if (tableRangeKey != null) { AttributeValue tableRangeAttr = item.get(tableRangeKey); String expectedTableRangeKey = AttributeValueConverter.toBlankString(tableRangeAttr); Assert.assertEquals( "Size violated GSI hash key's table's hash key's range key does not match in the output!", expectedTableRangeKey, hashToRangeHashSizeViolationsMap.get(expectedTableHashKey)); } hashToRangeHashSizeViolationsMap.remove(expectedTableHashKey); } else { Assert.fail("Expected size violation on hash key not found!"); } // Check for gsi hash value if (violatedValueExists) { AttributeValue gsiHashAttr = item.get(gsiHashKeyName); String expectedGsiHashValue = AttributeValueConverter .toStringWithAttributeType(gsiHashAttr); Assert.assertEquals("Size violated Gsi hash value mis-match", expectedGsiHashValue, hashToGsiHashSizeViolationsMap.get(expectedTableHashKey)); hashToGsiHashSizeViolationsMap.remove(expectedTableHashKey); } } Assert.assertEquals("Extra entries found for gsi hash key size violations", 0, hashToRangeHashSizeViolationsMap.size()); Assert.assertEquals("Extra entries found for gsi hash key size violation values", 0, hashToGsiHashSizeViolationsMap.size()); } if (rangeKeySizeViolations != null) { for (Map<String, AttributeValue> item : rangeKeySizeViolations) { AttributeValue tableHashAttr = item.get(tableHashKey); String expectedTableHashKey = AttributeValueConverter.toBlankString(tableHashAttr); if (hashToRangeRangeSizeViolationsMap.containsKey(expectedTableHashKey)) { if (tableRangeKey != null) { AttributeValue tableRangeAttr = item.get(tableRangeKey); String expectedTableRangeKey = AttributeValueConverter.toBlankString(tableRangeAttr); Assert.assertEquals( "Size violated GSI range key's table's hash key's range key does not match in the output!", expectedTableRangeKey, hashToRangeRangeSizeViolationsMap.get(expectedTableHashKey)); } hashToRangeRangeSizeViolationsMap.remove(expectedTableHashKey); } else { Assert.fail("Expected size violation on range key not found!"); } // Check for gsi range value if (violatedValueExists) { AttributeValue gsiRangeAttr = item.get(gsiRangeKeyName); String expectedGsiRangeValue = AttributeValueConverter .toStringWithAttributeType(gsiRangeAttr); Assert.assertEquals("Size violated Gsi range value mis-match", expectedGsiRangeValue, hashToGsiRangeSizeViolationsMap.get(expectedTableHashKey)); hashToGsiRangeSizeViolationsMap.remove(expectedTableHashKey); } } Assert.assertEquals("Extra entries found for gsi range key size violations", 0, hashToRangeRangeSizeViolationsMap.size()); Assert.assertEquals("Extra entries found for gsi range key size violation values", 0, hashToGsiRangeSizeViolationsMap.size()); } if (hashKeyTypeViolations != null) { for (Map<String, AttributeValue> item : hashKeyTypeViolations) { AttributeValue tableHashAttr = item.get(tableHashKey); String expectedTableHashKey = AttributeValueConverter.toBlankString(tableHashAttr); if (hashToRangeHashTypeViolationsMap.containsKey(expectedTableHashKey)) { if (tableRangeKey != null) { AttributeValue tableRangeAttr = item.get(tableRangeKey); String exptectedTableRangeKey = AttributeValueConverter.toBlankString(tableRangeAttr); Assert.assertEquals( "Type violated GSI hash key's table's hash key's range key does not match in the output!", exptectedTableRangeKey, hashToRangeHashTypeViolationsMap.get(expectedTableHashKey)); } hashToRangeHashTypeViolationsMap.remove(expectedTableHashKey); } else { Assert.fail("Expected type violation on hash key not found!"); } // Check for gsi hash value if (violatedValueExists) { AttributeValue gsiHashAttr = item.get(gsiHashKeyName); String expectedGsiHashValue = AttributeValueConverter .toStringWithAttributeType(gsiHashAttr); Assert.assertEquals("Type violated Gsi hash value mis-match", expectedGsiHashValue, hashToGsiHashTypeViolationsMap.get(expectedTableHashKey)); hashToGsiHashTypeViolationsMap.remove(expectedTableHashKey); } } Assert.assertEquals("Extra entries found for gsi hash key type violations", 0, hashToRangeHashTypeViolationsMap.size()); Assert.assertEquals("Extra entries found for gsi hash key type violation values", 0, hashToGsiHashTypeViolationsMap.size()); } if (rangeKeyTypeViolations != null) { for (Map<String, AttributeValue> item : rangeKeyTypeViolations) { AttributeValue tableHashAttr = item.get(tableHashKey); String expectedTableHashKey = AttributeValueConverter.toBlankString(tableHashAttr); if (hashToRangeRangeTypeViolationsMap.containsKey(expectedTableHashKey)) { if (tableRangeKey != null) { AttributeValue tableRangeAttr = item.get(tableRangeKey); String exptectedTableRangeKey = AttributeValueConverter.toBlankString(tableRangeAttr); Assert.assertEquals( "Type violated GSI range key's table's hash key's range key does not match in the output!", exptectedTableRangeKey, hashToRangeRangeTypeViolationsMap.get(expectedTableHashKey)); } hashToRangeRangeTypeViolationsMap.remove(expectedTableHashKey); } else { Assert.fail("Expected type violation on range key not found!"); } // Check for gsi range value if (violatedValueExists) { AttributeValue gsiRangeAttr = item.get(gsiRangeKeyName); String expectedGsiRangeValue = AttributeValueConverter .toStringWithAttributeType(gsiRangeAttr); Assert.assertEquals("Type violated Gsi range value mis-match", expectedGsiRangeValue, hashToGsiRangeTypeViolationsMap.get(expectedTableHashKey)); hashToGsiRangeTypeViolationsMap.remove(expectedTableHashKey); } } Assert.assertEquals("Extra entries found for gsi range key type violations", 0, hashToRangeRangeTypeViolationsMap.size()); Assert.assertEquals("Extra entries found for gsi range key type violation values", 0, hashToGsiRangeTypeViolationsMap.size()); } } finally { br.close(); parser.close(); } }