List of usage examples for org.apache.commons.csv CSVParser parse
public static CSVParser parse(final URL url, final Charset charset, final CSVFormat format) throws IOException
From source file:com.ibm.g11n.pipeline.example.MultiBundleCSVFilter.java
@Override public void merge(InputStream baseStream, OutputStream outStream, Map<String, LanguageBundle> languageBundles, FilterOptions options) throws IOException, ResourceFilterException { // create key-value map for each bundle Map<String, Map<String, String>> kvMaps = new HashMap<String, Map<String, String>>(); for (Entry<String, LanguageBundle> bundleEntry : languageBundles.entrySet()) { LanguageBundle languageBundle = bundleEntry.getValue(); Map<String, String> kvMap = new HashMap<String, String>(); for (ResourceString resString : languageBundle.getResourceStrings()) { kvMap.put(resString.getKey(), resString.getValue()); }/*w w w . j a va 2 s . c o m*/ kvMaps.put(bundleEntry.getKey(), kvMap); } CSVParser parser = CSVParser.parse(baseStream, StandardCharsets.UTF_8, CSVFormat.RFC4180.withHeader("module", "key", "value").withSkipHeaderRecord(true)); BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(outStream, StandardCharsets.UTF_8)); CSVPrinter printer = CSVFormat.RFC4180.withHeader("module", "key", "value").print(writer); for (CSVRecord record : parser) { String module = record.get(0); String key = record.get(1); String value = record.get(2); Map<String, String> moduleKVMap = kvMaps.get(module); if (moduleKVMap != null) { String trValue = moduleKVMap.get(key); if (trValue != null) { value = trValue; } } printer.printRecord(module, key, value); } printer.flush(); }
From source file:MasterRoomControllerFx.rooms.charts.RoomChartController.java
public void getHumData() { try {/*from w w w. j av a 2 s . c o m*/ File csvData = new File("humHistory" + roomRow + roomColumn + ".csv"); if (csvData.exists()) { CSVParser parser = CSVParser.parse(csvData, StandardCharsets.UTF_8, CSVFormat.EXCEL.withDelimiter(';')); for (CSVRecord csvRecord : parser) { for (int i = 0; i < csvRecord.size() - 1; i++) { hum.add(Float.parseFloat(csvRecord.get(i))); } } } } catch (IOException ex) { Logger.getLogger(RoomChartController.class.getName()).log(Level.SEVERE, null, ex); } }
From source file:canreg.client.dataentry.Import.java
/** * * @param task/*from w ww . j av a 2 s.c o m*/ * @param doc * @param map * @param file * @param server * @param io * @return * @throws java.sql.SQLException * @throws java.rmi.RemoteException * @throws canreg.server.database.RecordLockedException */ public static boolean importFile(Task<Object, String> task, Document doc, List<canreg.client.dataentry.Relation> map, File file, CanRegServerInterface server, ImportOptions io) throws SQLException, RemoteException, SecurityException, RecordLockedException { //public static boolean importFile(canreg.client.gui.management.CanReg4MigrationInternalFrame.MigrationTask task, Document doc, List<canreg.client.dataentry.Relation> map, File file, CanRegServerInterface server, ImportOptions io) throws SQLException, RemoteException, SecurityException, RecordLockedException { boolean success = false; Set<String> noNeedToLookAtPatientVariables = new TreeSet<String>(); noNeedToLookAtPatientVariables.add(io.getPatientIDVariableName()); noNeedToLookAtPatientVariables.add(io.getPatientRecordIDVariableName()); String firstNameVariableName = io.getFirstNameVariableName(); String sexVariableName = io.getSexVariableName(); CSVParser parser = null; CSVFormat format = CSVFormat.DEFAULT.withFirstRecordAsHeader().withDelimiter(io.getSeparator()); int linesToRead = io.getMaxLines(); HashMap mpCodes = new HashMap(); int numberOfLinesRead = 0; Map<String, Integer> nameSexTable = server.getNameSexTables(); try { // FileInputStream fis = new FileInputStream(file); // BufferedReader bsr = new BufferedReader(new InputStreamReader(fis, io.getFileCharset())); // Logger.getLogger(Import.class.getName()).log(Level.CONFIG, "Name of the character encoding {0}"); int numberOfRecordsInFile = canreg.common.Tools.numberOfLinesInFile(file.getAbsolutePath()); if (linesToRead > 0) { linesToRead = Math.min(numberOfRecordsInFile, linesToRead); } else { linesToRead = numberOfRecordsInFile; } parser = CSVParser.parse(file, io.getFileCharset(), format); for (CSVRecord csvRecord : parser) { numberOfLinesRead++; // We allow for null tasks... boolean needToSavePatientAgain = true; int patientDatabaseRecordID = -1; if (task != null) { task.firePropertyChange("progress", (numberOfLinesRead - 1) * 100 / linesToRead, (numberOfLinesRead) * 100 / linesToRead); } // Build patient part Patient patient = new Patient(); for (int i = 0; i < map.size(); i++) { Relation rel = map.get(i); if (rel.getDatabaseTableVariableID() >= 0 && rel.getDatabaseTableName().equalsIgnoreCase("patient")) { if (rel.getFileColumnNumber() < csvRecord.size()) { if (rel.getVariableType().equalsIgnoreCase("Number")) { if (csvRecord.get(rel.getFileColumnNumber()).length() > 0) { try { patient.setVariable(rel.getDatabaseVariableName(), Integer.parseInt(csvRecord.get(rel.getFileColumnNumber()))); } catch (NumberFormatException ex) { Logger.getLogger(Import.class.getName()).log(Level.SEVERE, "Number format error in line: " + (numberOfLinesRead + 1 + 1) + ". ", ex); success = false; } } } else { patient.setVariable(rel.getDatabaseVariableName(), StringEscapeUtils.unescapeCsv(csvRecord.get(rel.getFileColumnNumber()))); } } else { Logger.getLogger(Import.class.getName()).log(Level.INFO, "Something wrong with patient part of line " + numberOfLinesRead + ".", new Exception("Error in line: " + numberOfLinesRead + ". Can't find field: " + rel.getDatabaseVariableName())); } } } // debugOut(patient.toString()); // Build tumour part Tumour tumour = new Tumour(); for (canreg.client.dataentry.Relation rel : map) { if (rel.getDatabaseTableVariableID() >= 0 && rel.getDatabaseTableName().equalsIgnoreCase("tumour")) { if (rel.getFileColumnNumber() < csvRecord.size()) { if (rel.getVariableType().equalsIgnoreCase("Number")) { if (csvRecord.get(rel.getFileColumnNumber()).length() > 0) { try { tumour.setVariable(rel.getDatabaseVariableName(), Integer.parseInt(csvRecord.get(rel.getFileColumnNumber()))); } catch (NumberFormatException ex) { Logger.getLogger(Import.class.getName()).log(Level.SEVERE, "Number format error in line: " + (numberOfLinesRead + 1 + 1) + ". ", ex); success = false; } } } else { tumour.setVariable(rel.getDatabaseVariableName(), StringEscapeUtils.unescapeCsv(csvRecord.get(rel.getFileColumnNumber()))); } } else { Logger.getLogger(Import.class.getName()).log(Level.INFO, "Something wrong with tumour part of line " + numberOfLinesRead + ".", new Exception("Error in line: " + numberOfLinesRead + ". Can't find field: " + rel.getDatabaseVariableName())); } } } // Build source part Set<Source> sources = Collections.synchronizedSet(new LinkedHashSet<Source>()); Source source = new Source(); for (canreg.client.dataentry.Relation rel : map) { if (rel.getDatabaseTableVariableID() >= 0 && rel.getDatabaseTableName().equalsIgnoreCase(Globals.SOURCE_TABLE_NAME)) { if (rel.getFileColumnNumber() < csvRecord.size()) { if (rel.getVariableType().equalsIgnoreCase("Number")) { if (csvRecord.get(rel.getFileColumnNumber()).length() > 0) { try { source.setVariable(rel.getDatabaseVariableName(), Integer.parseInt(csvRecord.get(rel.getFileColumnNumber()))); } catch (NumberFormatException ex) { Logger.getLogger(Import.class.getName()).log(Level.SEVERE, "Number format error in line: " + (numberOfLinesRead + 1 + 1) + ". ", ex); success = false; } } } else { source.setVariable(rel.getDatabaseVariableName(), StringEscapeUtils.unescapeCsv(csvRecord.get(rel.getFileColumnNumber()))); } } else { Logger.getLogger(Import.class.getName()).log(Level.INFO, "Something wrong with source part of line " + numberOfLinesRead + ".", new Exception("Error in line: " + numberOfLinesRead + ". Can't find field: " + rel.getDatabaseVariableName())); } } } sources.add(source); tumour.setSources(sources); // debugOut(tumour.toString()); // add patient to the database Object patientID = patient.getVariable(io.getPatientIDVariableName()); Object patientRecordID = patient.getVariable(io.getPatientRecordIDVariableName()); if (patientID == null) { // save the record to get the new patientID; patientDatabaseRecordID = server.savePatient(patient); patient = (Patient) server.getRecord(patientDatabaseRecordID, Globals.PATIENT_TABLE_NAME, false); patientID = patient.getVariable(io.getPatientIDVariableName()); patientRecordID = patient.getVariable(io.getPatientRecordIDVariableName()); } if (io.isDataFromPreviousCanReg()) { // set update date for the patient the same as for the tumour Object updateDate = tumour.getVariable(io.getTumourUpdateDateVariableName()); patient.setVariable(io.getPatientUpdateDateVariableName(), updateDate); // Set the patientID the same as the tumourID initially // Object tumourSequence = tumour.getVariable(io.getTumourSequenceVariableName()); Object tumourSequence = "1"; String tumourSequenceString = tumourSequence + ""; while (tumourSequenceString.length() < Globals.ADDITIONAL_DIGITS_FOR_PATIENT_RECORD) { tumourSequenceString = "0" + tumourSequenceString; } patientRecordID = patientID + "" + tumourSequenceString; // If this is a multiple primary tumour... String mpCodeString = (String) tumour.getVariable(io.getMultiplePrimaryVariableName()); if (mpCodeString != null && mpCodeString.length() > 0) { patientID = lookUpPatientID(mpCodeString, patientID, mpCodes); // rebuild sequenceNumber Tumour[] tumours = new Tumour[0]; try { tumours = CanRegClientApp.getApplication() .getTumourRecordsBasedOnPatientID(patientID + "", false); } catch (DistributedTableDescriptionException ex) { Logger.getLogger(Import.class.getName()).log(Level.SEVERE, null, ex); } catch (UnknownTableException ex) { Logger.getLogger(Import.class.getName()).log(Level.SEVERE, null, ex); } tumourSequenceString = (tumours.length + 1) + ""; while (tumourSequenceString.length() < Globals.ADDITIONAL_DIGITS_FOR_PATIENT_RECORD) { tumourSequenceString = "0" + tumourSequenceString; } patientRecordID = patientID + "" + tumourSequenceString; Patient[] oldPatients = null; try { oldPatients = CanRegClientApp.getApplication().getPatientRecordsByID((String) patientID, false); } catch (RemoteException ex) { Logger.getLogger(Import.class.getName()).log(Level.SEVERE, null, ex); } catch (SecurityException ex) { Logger.getLogger(Import.class.getName()).log(Level.SEVERE, null, ex); } catch (DistributedTableDescriptionException ex) { Logger.getLogger(Import.class.getName()).log(Level.SEVERE, null, ex); } catch (RecordLockedException ex) { Logger.getLogger(Import.class.getName()).log(Level.SEVERE, null, ex); } catch (SQLException ex) { Logger.getLogger(Import.class.getName()).log(Level.SEVERE, null, ex); } catch (UnknownTableException ex) { Logger.getLogger(Import.class.getName()).log(Level.SEVERE, null, ex); } for (Patient oldPatient : oldPatients) { if (!Tools.newRecordContainsNewInfo(patient, oldPatient, noNeedToLookAtPatientVariables)) { needToSavePatientAgain = false; patient = oldPatient; patientRecordID = oldPatient.getVariable(io.getPatientRecordIDVariableName()); } } } Object tumourID = patientRecordID + "" + tumourSequenceString; // patient.setVariable(io.getPatientIDVariableName(), patientID); tumour.setVariable(io.getTumourIDVariablename(), tumourID); // And store the record ID patient.setVariable(io.getPatientRecordIDVariableName(), patientRecordID); // Set the patient ID number on the tumour tumour.setVariable(io.getPatientIDTumourTableVariableName(), patientID); tumour.setVariable(io.getPatientRecordIDTumourTableVariableName(), patientRecordID); // Set the deprecated flag to 0 - no obsolete records from CR4 tumour.setVariable(io.getObsoleteTumourFlagVariableName(), "0"); patient.setVariable(io.getObsoletePatientFlagVariableName(), "0"); } // Set the name in the firstName database String sex = (String) patient.getVariable(sexVariableName); if (sex != null && sex.length() > 0) { Integer sexCode = Integer.parseInt(sex); String firstNames = (String) patient.getVariable(firstNameVariableName); if (firstNames != null) { String[] firstNamesArray = firstNames.split(" "); for (String firstName : firstNamesArray) { if (firstName != null && firstName.trim().length() > 0) { // here we use the locale specific toUpperCase Integer registeredSexCode = nameSexTable.get(firstName); if (registeredSexCode == null) { NameSexRecord nsr = new NameSexRecord(); nsr.setName(firstName); nsr.setSex(sexCode); server.saveNameSexRecord(nsr, false); nameSexTable.put(firstName, sexCode); } else if (registeredSexCode != sexCode) { if (registeredSexCode != 9) { sexCode = 9; NameSexRecord nsr = new NameSexRecord(); nsr.setName(firstName); nsr.setSex(sexCode); server.saveNameSexRecord(nsr, true); nameSexTable.remove(firstName); nameSexTable.put(firstName, sexCode); } } } } } } if (needToSavePatientAgain) { if (patientDatabaseRecordID > 0) { server.editPatient(patient); } else { patientDatabaseRecordID = server.savePatient(patient); } } if (patient != null && tumour != null) { String icd10 = (String) tumour.getVariable(io.getICD10VariableName()); if (icd10 == null || icd10.trim().length() == 0) { ConversionResult[] conversionResult = canreg.client.CanRegClientApp.getApplication() .performConversions(Converter.ConversionName.ICDO3toICD10, patient, tumour); tumour.setVariable(io.getICD10VariableName(), conversionResult[0].getValue()); } String iccc = (String) tumour.getVariable(io.getICCCVariableName()); if (iccc == null || iccc.trim().length() == 0) { ConversionResult[] conversionResult = canreg.client.CanRegClientApp.getApplication() .performConversions(Converter.ConversionName.ICDO3toICCC3, patient, tumour); tumour.setVariable(io.getICCCVariableName(), conversionResult[0].getValue()); } } if (tumour.getVariable(io.getPatientIDTumourTableVariableName()) == null) { tumour.setVariable(io.getPatientIDTumourTableVariableName(), patientID); } if (tumour.getVariable(io.getPatientRecordIDTumourTableVariableName()) == null) { tumour.setVariable(io.getPatientRecordIDTumourTableVariableName(), patientRecordID); } int tumourDatabaseIDNumber = server.saveTumour(tumour); if (Thread.interrupted()) { //We've been interrupted: no more importing. throw new InterruptedException(); } } task.firePropertyChange("finished", null, null); success = true; } catch (IOException ex) { Logger.getLogger(Import.class.getName()).log(Level.SEVERE, "Error in line: " + (numberOfLinesRead + 1 + 1) + ". ", ex); success = false; } catch (NumberFormatException ex) { Logger.getLogger(Import.class.getName()).log(Level.SEVERE, "Error in line: " + (numberOfLinesRead + 1 + 1) + ". ", ex); success = false; } catch (InterruptedException ex) { Logger.getLogger(Import.class.getName()).log(Level.INFO, "Interupted on line: " + (numberOfLinesRead + 1) + ". ", ex); success = true; } catch (IndexOutOfBoundsException ex) { Logger.getLogger(Import.class.getName()).log(Level.SEVERE, "Error in line: " + (numberOfLinesRead + 1 + 1) + ". ", ex); success = false; } catch (SQLException ex) { Logger.getLogger(Import.class.getName()).log(Level.SEVERE, "Error in line: " + (numberOfLinesRead + 1 + 1) + ". ", ex); success = false; } finally { if (parser != null) { try { parser.close(); } catch (IOException ex) { Logger.getLogger(Import.class.getName()).log(Level.SEVERE, null, ex); } } } return success; }
From source file:io.mindmaps.migration.csv.CSVDataMigratorTest.java
private CSVParser parser(String fileName) { File file = new File(CSVSchemaMigratorTest.class.getClassLoader().getResource(fileName).getPath()); CSVParser csvParser = null;/*from w w w . jav a 2s. c o m*/ try { csvParser = CSVParser.parse(file.toURI().toURL(), StandardCharsets.UTF_8, CSVFormat.DEFAULT.withHeader()); } catch (IOException e) { e.printStackTrace(); } return csvParser; }
From source file:edu.isi.misd.scanner.network.modules.worker.processors.ptr.PrepToResearchProcessor.java
private PrepToResearchResponse analyzeFile(PrepToResearchRequest request, File analysisFile) throws Exception { PrepToResearchResponse response = new PrepToResearchResponse(); Integer requestedOmopConceptID = request.getOmopConceptID(); CSVFormat csvFormat = CSVFormat.newFormat(',').withHeader().withCommentMarker('#').withQuote('"'); CSVParser parser = CSVParser.parse(analysisFile, Charset.defaultCharset(), csvFormat); for (CSVRecord csvRecord : parser) { try {/*from w ww . j a v a 2 s . c om*/ this.validateCSVRecord(csvRecord); // check the ID first, if no match continue Integer omopConceptID = Integer .parseInt(csvRecord.get(ExpectedColumnName.OMOP_CONCEPT_ID.toString())); if (!requestedOmopConceptID.equals(omopConceptID)) { continue; } // match found, create response output record if (log.isDebugEnabled()) { log.debug(String.format("Found a match for requested ID %s, record: %s", requestedOmopConceptID, csvRecord.toString())); } PrepToResearchRecord ptrRecord = new PrepToResearchRecord(); ptrRecord.setOmopConceptID(omopConceptID); ptrRecord.setOmopConceptName(csvRecord.get(ExpectedColumnName.OMOP_CONCEPT_NAME)); ptrRecord.setCategory(csvRecord.get(ExpectedColumnName.CATEGORY)); ptrRecord.setCategoryValue(csvRecord.get(ExpectedColumnName.CATEGORY_VALUE)); ptrRecord.setCountFemales(Integer.parseInt(csvRecord.get(ExpectedColumnName.COUNT_FEMALES))); ptrRecord.setCountMales(Integer.parseInt(csvRecord.get(ExpectedColumnName.COUNT_MALES))); ptrRecord.setCountTotal(Integer.parseInt(csvRecord.get(ExpectedColumnName.COUNT_TOTAL))); response.getPrepToResearchRecord().add(ptrRecord); } catch (Exception e) { String error = String.format( "An exception occured while processing row number %s with the following values %s: %s", csvRecord.getRecordNumber(), csvRecord.toString(), e.toString()); parser.close(); throw new RuntimeException(error); } } parser.close(); return response; }
From source file:com.me.jvmi.Main.java
public static Collection<InputRecord> parseInput(Path csv) throws IOException { Map<String, InputRecord> records = new HashMap<>(); CSVParser parser = CSVParser.parse(csv.toFile(), Charset.forName("UTF-8"), CSVFormat.DEFAULT.withHeader()); for (CSVRecord record : parser) { InputRecord input = new InputRecord(record); records.put(input.getId(), input); }/*from ww w . j a va 2 s .c o m*/ for (InputRecord record : records.values()) { if (record.isPackage()) { for (String id : record.packageIds) { if (!records.containsKey(id)) { throw new IllegalStateException("Could not find product for package id: " + id); } record.addItem(records.get(id)); } } } return records.values(); }
From source file:edu.ucla.cs.scai.swim.qa.ontology.dbpedia.tipicality.Test.java
private static ArrayList<HashSet<String>> extractEntities(File csvData, int nOfAttributes) throws IOException { CSVParser parser = CSVParser.parse(csvData, Charset.defaultCharset(), CSVFormat.RFC4180); int r = 0;//ww w .j a v a 2s. co m ArrayList<Integer> attributePositions = new ArrayList<>(); ArrayList<String> attributeNames = new ArrayList<>(); ArrayList<HashSet<String>> res = new ArrayList<>(); for (CSVRecord csvRecord : parser) { if (r == 0) { Iterator<String> it = csvRecord.iterator(); it.next(); //skip URI if (!it.hasNext()) { //it is an empty file return res; } it.next(); //skip rdf-schema#label it.next(); //skip rdf-schema#comment int c = 2; for (; it.hasNext();) { c++; String attr = it.next(); if (!attr.endsWith("_label")) { attributePositions.add(c); } } } else if (r == 1) { Iterator<String> it = csvRecord.iterator(); it.next(); //skip uri it.next(); //skip rdf-schema#label it.next(); //skip rdf-schema#comment int c = 2; int i = 0; while (i < attributePositions.size()) { c++; String attr = it.next(); if (attributePositions.get(i) == c) { if (!stopAttributes.contains(attr)) { attributes.add(attr); } attributeNames.add(attr); i++; } } } else if (r > 3) { ArrayList<String> attributesOfThisEntity = new ArrayList<>(); Iterator<String> it = csvRecord.iterator(); String uri = it.next(); it.next(); //skip rdf-schema#label it.next(); //skip rdf-schema#comment int c = 2; int i = 0; while (i < attributePositions.size()) { c++; String val = it.next(); if (attributePositions.get(i) == c) { if (!val.equalsIgnoreCase("null")) { String attribute = attributeNames.get(i); if (!stopAttributes.contains(attribute)) { attributesOfThisEntity.add(attribute); } } i++; } } Collections.shuffle(attributesOfThisEntity); HashSet<String> s = new HashSet<>(); for (int k = 0; k < Math.min(nOfAttributes, attributesOfThisEntity.size()); k++) { s.add(attributesOfThisEntity.get(k)); } res.add(s); } r++; } return res; }
From source file:com.hurence.logisland.service.cache.CSVKeyValueCacheService.java
@Override // @OnEnabled// w w w . jav a 2 s . co m public void init(ControllerServiceInitializationContext context) throws InitializationException { super.init(context); try { if (context.getPropertyValue(DATABASE_FILE_URI).isSet()) { dbUri = context.getPropertyValue(DATABASE_FILE_URI).asString(); } if (context.getPropertyValue(DATABASE_FILE_PATH).isSet()) { dbPath = context.getPropertyValue(DATABASE_FILE_PATH).asString(); } if ((dbUri == null) && (dbPath == null)) { throw new Exception( "You must declare " + DATABASE_FILE_URI.getName() + " or " + DATABASE_FILE_PATH.getName()); } InputStream is = null; if (dbUri != null) { logger.info("opening csv database from hdfs : " + dbUri); is = initFromUri(dbUri); } if (dbPath != null) { logger.info("opening csv database from local fs : " + dbPath); is = initFromPath(context, dbPath); } if (is == null) { throw new InitializationException("Something went wrong while initializing csv db from " + DATABASE_FILE_URI.getName() + " or " + DATABASE_FILE_PATH.getName()); } // final Reader reader = new InputStreamReader(is); CSVFormat format = CSVFormat.DEFAULT; if (context.getPropertyValue(CSV_FORMAT).asString().equals(CSV_EXCEL.getValue())) { format = CSVFormat.EXCEL; } else if (context.getPropertyValue(CSV_FORMAT).asString().equals(CSV_EXCEL_FR.getValue())) { format = CSVFormat.EXCEL.withDelimiter(';'); } else if (context.getPropertyValue(CSV_FORMAT).asString().equals(CSV_MYSQL.getValue())) { format = CSVFormat.MYSQL; } else if (context.getPropertyValue(CSV_FORMAT).asString().equals(CSV_RFC4180.getValue())) { format = CSVFormat.RFC4180; } else if (context.getPropertyValue(CSV_FORMAT).asString().equals(CSV_TDF.getValue())) { format = CSVFormat.TDF; } if (context.getPropertyValue(CSV_HEADER).isSet()) { String[] columnNames = context.getPropertyValue(CSV_HEADER).asString().split(","); for (String name : columnNames) { headers.get().put(name, "string"); } format = format.withHeader(columnNames); } else if (context.getPropertyValue(FIRST_LINE_HEADER).isSet()) { format = format.withFirstRecordAsHeader(); } else { throw new InitializationException("unable to get headers from somewhere"); } Charset charset = Charset.forName("UTF-8"); if (context.getPropertyValue(ENCODING_CHARSET).isSet()) { String encoding = context.getPropertyValue(ENCODING_CHARSET).asString(); charset = Charset.forName(encoding); } rowKey = context.getPropertyValue(ROW_KEY).asString(); CSVParser parser = CSVParser.parse(is, charset, format); //new CSVParser(reader, format); /* * CSVParser parser = null; if (context.getPropertyValue(ENCODING_CHARSET).isSet()) { String encoding = context.getPropertyValue(ENCODING_CHARSET).asString(); parser = CSVParser.parse(reader, Charset.forName(encoding), format); } else { parser = CSVParser.parse(reader, format); } */ long count = 0; try { final Set<String> columnNames = parser.getHeaderMap().keySet(); for (final CSVRecord record : parser) { Record logislandRecord = new StandardRecord(); for (final String column : columnNames) { logislandRecord.setStringField(column, record.get(column)); } set(logislandRecord.getField(rowKey).asString(), logislandRecord); count++; } } finally { logger.info("successfully loaded " + count + " records from CSV file"); parser.close(); is.close(); } } catch (Exception e) { getLogger().error("Could not load database file: {}", new Object[] { e.getMessage() }); throw new InitializationException(e); } }
From source file:canreg.client.dataentry.Convert.java
public static boolean importFile(canreg.client.gui.management.CanReg4MigrationInternalFrame.MigrationTask task, Document doc, List<canreg.client.dataentry.Relation> map, File file, CanRegServerInterface server, ImportOptions io) throws SQLException, RemoteException, SecurityException, RecordLockedException { boolean success = false; Set<String> noNeedToLookAtPatientVariables = new TreeSet<String>(); noNeedToLookAtPatientVariables/*from ww w. j a v a 2s . co m*/ .add(canreg.common.Tools.toLowerCaseStandardized(io.getPatientIDVariableName())); noNeedToLookAtPatientVariables .add(canreg.common.Tools.toLowerCaseStandardized(io.getPatientRecordIDVariableName())); String firstNameVariableName = io.getFirstNameVariableName(); String sexVariableName = io.getSexVariableName(); CSVParser parser = null; HashMap mpCodes = new HashMap(); int numberOfLinesRead = 0; Map<String, Integer> nameSexTable = server.getNameSexTables(); try { // Logger.getLogger(Import.class.getName()).log(Level.CONFIG, "Name of the character encoding {0}"); int numberOfRecordsInFile = canreg.common.Tools.numberOfLinesInFile(file.getAbsolutePath()); debugOut("Importing data from " + file); CSVFormat format = CSVFormat.DEFAULT.withFirstRecordAsHeader().withDelimiter(io.getSeparator()); parser = CSVParser.parse(file, io.getFileCharset(), format); int linesToRead = io.getMaxLines(); if (linesToRead == -1 || linesToRead > numberOfRecordsInFile) { linesToRead = numberOfRecordsInFile; } for (CSVRecord csvRecord : parser) { numberOfLinesRead++; // We allow for null tasks... boolean needToSavePatientAgain = true; int patientDatabaseRecordID = -1; if (task != null) { if (canreg.client.gui.management.CanReg4MigrationInternalFrame.isPaused) { task.firePropertyChange("paused", false, true); } if (!canreg.client.gui.management.CanReg4MigrationInternalFrame.isPaused) { task.firePropertyChange("paused", true, false); task.firePropertyChange("progress", (numberOfLinesRead - 1) * 100 / linesToRead, (numberOfLinesRead) * 100 / linesToRead); } } // Build patient part Patient patient = new Patient(); for (int i = 0; i < map.size(); i++) { Relation rel = map.get(i); if (rel.getDatabaseTableVariableID() >= 0 && rel.getDatabaseTableName().equalsIgnoreCase("patient")) { if (rel.getFileColumnNumber() < csvRecord.size()) { if (rel.getVariableType().equalsIgnoreCase("Number")) { if (csvRecord.get(rel.getFileColumnNumber()).length() > 0) { try { patient.setVariable(rel.getDatabaseVariableName(), Integer.parseInt(csvRecord.get(rel.getFileColumnNumber()))); } catch (NumberFormatException ex) { Logger.getLogger(Import.class.getName()).log(Level.SEVERE, "Number format error in line: " + (numberOfLinesRead + 1 + 1) + ". ", ex); success = false; } } } else { patient.setVariable(rel.getDatabaseVariableName(), StringEscapeUtils.unescapeCsv(csvRecord.get(rel.getFileColumnNumber()))); } } else { Logger.getLogger(Import.class.getName()).log(Level.INFO, "Something wrong with patient part of line " + numberOfLinesRead + ".", new Exception("Error in line: " + numberOfLinesRead + ". Can't find field: " + rel.getDatabaseVariableName())); } } } // debugOut(patient.toString()); // Build tumour part Tumour tumour = new Tumour(); for (int i = 0; i < map.size(); i++) { Relation rel = map.get(i); if (rel.getDatabaseTableVariableID() >= 0 && rel.getDatabaseTableName().equalsIgnoreCase("tumour") && rel.getFileColumnNumber() < csvRecord.size()) { if (rel.getFileColumnNumber() < csvRecord.size()) { if (rel.getVariableType().equalsIgnoreCase("Number")) { if (csvRecord.get(rel.getFileColumnNumber()).length() > 0) { try { tumour.setVariable(rel.getDatabaseVariableName(), Integer.parseInt(csvRecord.get(rel.getFileColumnNumber()))); } catch (NumberFormatException ex) { Logger.getLogger(Import.class.getName()).log(Level.SEVERE, "Number format error in line: " + (numberOfLinesRead + 1 + 1) + ". ", ex); success = false; } } } else { tumour.setVariable(rel.getDatabaseVariableName(), StringEscapeUtils.unescapeCsv(csvRecord.get(rel.getFileColumnNumber()))); } } else { Logger.getLogger(Import.class.getName()).log(Level.INFO, "Something wrong with tumour part of line " + numberOfLinesRead + ".", new Exception("Error in line: " + numberOfLinesRead + ". Can't find field: " + rel.getDatabaseVariableName())); } } } // Build source part Set<Source> sources = Collections.synchronizedSet(new LinkedHashSet<Source>()); Source source = new Source(); for (int i = 0; i < map.size(); i++) { Relation rel = map.get(i); if (rel.getDatabaseTableVariableID() >= 0 && rel.getDatabaseTableName().equalsIgnoreCase(Globals.SOURCE_TABLE_NAME) && rel.getFileColumnNumber() < csvRecord.size()) { if (rel.getFileColumnNumber() < csvRecord.size()) { if (rel.getVariableType().equalsIgnoreCase("Number")) { if (csvRecord.get(rel.getFileColumnNumber()).length() > 0) { try { source.setVariable(rel.getDatabaseVariableName(), Integer.parseInt(csvRecord.get(rel.getFileColumnNumber()))); } catch (NumberFormatException ex) { Logger.getLogger(Import.class.getName()).log(Level.SEVERE, "Number format error in line: " + (numberOfLinesRead + 1 + 1) + ". ", ex); success = false; } } } else { source.setVariable(rel.getDatabaseVariableName(), StringEscapeUtils.unescapeCsv(csvRecord.get(rel.getFileColumnNumber()))); } } else { Logger.getLogger(Import.class.getName()).log(Level.INFO, "Something wrong with source part of line " + numberOfLinesRead + ".", new Exception("Error in line: " + numberOfLinesRead + ". Can't find field: " + rel.getDatabaseVariableName())); } } } sources.add(source); tumour.setSources(sources); // debugOut(tumour.toString()); // add patient to the database Object patientID = patient.getVariable(io.getPatientIDVariableName()); Object patientRecordID = patient.getVariable(io.getPatientRecordIDVariableName()); if (patientID == null) { // save the record to get the new patientID; patientDatabaseRecordID = server.savePatient(patient); patient = (Patient) server.getRecord(patientDatabaseRecordID, Globals.PATIENT_TABLE_NAME, false); patientID = patient.getVariable(io.getPatientIDVariableName()); patientRecordID = patient.getVariable(io.getPatientRecordIDVariableName()); } if (io.isDataFromPreviousCanReg()) { // set update date for the patient the same as for the tumour Object updateDate = tumour.getVariable(io.getTumourUpdateDateVariableName()); patient.setVariable(io.getPatientUpdateDateVariableName(), updateDate); // Set the patientID the same as the tumourID initially // Object tumourSequence = tumour.getVariable(io.getTumourSequenceVariableName()); Object tumourSequence = "1"; String tumourSequenceString = tumourSequence + ""; while (tumourSequenceString.length() < Globals.ADDITIONAL_DIGITS_FOR_PATIENT_RECORD) { tumourSequenceString = "0" + tumourSequenceString; } patientRecordID = patientID + "" + tumourSequenceString; // If this is a multiple primary tumour... String mpCodeString = (String) tumour.getVariable(io.getMultiplePrimaryVariableName()); if (mpCodeString != null && mpCodeString.length() > 0) { patientID = lookUpPatientID(mpCodeString, patientID, mpCodes); // rebuild sequenceNumber Tumour[] tumours = new Tumour[0]; try { tumours = CanRegClientApp.getApplication() .getTumourRecordsBasedOnPatientID(patientID + "", false); } catch (DistributedTableDescriptionException ex) { Logger.getLogger(Import.class.getName()).log(Level.SEVERE, null, ex); } catch (UnknownTableException ex) { Logger.getLogger(Import.class.getName()).log(Level.SEVERE, null, ex); } tumourSequenceString = (tumours.length + 1) + ""; while (tumourSequenceString.length() < Globals.ADDITIONAL_DIGITS_FOR_PATIENT_RECORD) { tumourSequenceString = "0" + tumourSequenceString; } patientRecordID = patientID + "" + tumourSequenceString; Patient[] oldPatients = null; try { oldPatients = CanRegClientApp.getApplication().getPatientRecordsByID((String) patientID, false); } catch (RemoteException ex) { Logger.getLogger(Import.class.getName()).log(Level.SEVERE, null, ex); } catch (SecurityException ex) { Logger.getLogger(Import.class.getName()).log(Level.SEVERE, null, ex); } catch (DistributedTableDescriptionException ex) { Logger.getLogger(Import.class.getName()).log(Level.SEVERE, null, ex); } catch (RecordLockedException ex) { Logger.getLogger(Import.class.getName()).log(Level.SEVERE, null, ex); } catch (SQLException ex) { Logger.getLogger(Import.class.getName()).log(Level.SEVERE, null, ex); } catch (UnknownTableException ex) { Logger.getLogger(Import.class.getName()).log(Level.SEVERE, null, ex); } for (Patient oldPatient : oldPatients) { if (!Tools.newRecordContainsNewInfo(patient, oldPatient, noNeedToLookAtPatientVariables)) { needToSavePatientAgain = false; patient = oldPatient; patientRecordID = oldPatient.getVariable(io.getPatientRecordIDVariableName()); } } } Object tumourID = patientRecordID + "" + tumourSequenceString; // patient.setVariable(io.getPatientIDVariableName(), patientID); tumour.setVariable(io.getTumourIDVariablename(), tumourID); // And store the record ID patient.setVariable(io.getPatientRecordIDVariableName(), patientRecordID); // Set the patient ID number on the tumour tumour.setVariable(io.getPatientIDTumourTableVariableName(), patientID); tumour.setVariable(io.getPatientRecordIDTumourTableVariableName(), patientRecordID); // Set the deprecated flag to 0 - no obsolete records from CR4 tumour.setVariable(io.getObsoleteTumourFlagVariableName(), "0"); patient.setVariable(io.getObsoletePatientFlagVariableName(), "0"); } // Set the name in the firstName database String sex = (String) patient.getVariable(sexVariableName); if (sex != null && sex.length() > 0) { Integer sexCode = Integer.parseInt(sex); String firstNames = (String) patient.getVariable(firstNameVariableName); if (firstNames != null) { String[] firstNamesArray = firstNames.split(" "); for (String firstName : firstNamesArray) { if (firstName != null && firstName.trim().length() > 0) { // here we use the locale specific toUpperCase Integer registeredSexCode = nameSexTable.get(firstName); if (registeredSexCode == null) { NameSexRecord nsr = new NameSexRecord(); nsr.setName(firstName); nsr.setSex(sexCode); server.saveNameSexRecord(nsr, false); nameSexTable.put(firstName, sexCode); } else if (registeredSexCode != sexCode) { if (registeredSexCode != 9) { sexCode = 9; NameSexRecord nsr = new NameSexRecord(); nsr.setName(firstName); nsr.setSex(sexCode); server.saveNameSexRecord(nsr, true); nameSexTable.remove(firstName); nameSexTable.put(firstName, sexCode); } } } } } } if (needToSavePatientAgain) { if (patientDatabaseRecordID > 0) { server.editPatient(patient); } else { patientDatabaseRecordID = server.savePatient(patient); } } if (patient != null && tumour != null) { String icd10 = (String) tumour.getVariable(io.getICD10VariableName()); if (icd10 == null || icd10.trim().length() == 0) { ConversionResult[] conversionResult = canreg.client.CanRegClientApp.getApplication() .performConversions(Converter.ConversionName.ICDO3toICD10, patient, tumour); tumour.setVariable(io.getICD10VariableName(), conversionResult[0].getValue()); } } if (tumour.getVariable(io.getPatientIDTumourTableVariableName()) == null) { tumour.setVariable(io.getPatientIDTumourTableVariableName(), patientID); } if (tumour.getVariable(io.getPatientRecordIDTumourTableVariableName()) == null) { tumour.setVariable(io.getPatientRecordIDTumourTableVariableName(), patientRecordID); } int tumourDatabaseIDNumber = server.saveTumour(tumour); if (Thread.interrupted()) { //We've been interrupted: no more importing. throw new InterruptedException(); } } task.firePropertyChange("finished", null, null); success = true; } catch (IOException ex) { Logger.getLogger(Import.class.getName()).log(Level.SEVERE, "Error in line: " + (numberOfLinesRead + 1 + 1) + ". ", ex); success = false; } catch (NumberFormatException ex) { Logger.getLogger(Import.class.getName()).log(Level.SEVERE, "Error in line: " + (numberOfLinesRead + 1 + 1) + ". ", ex); success = false; } catch (InterruptedException ex) { Logger.getLogger(Import.class.getName()).log(Level.INFO, "Interupted on line: " + (numberOfLinesRead + 1) + ". ", ex); success = true; } catch (IndexOutOfBoundsException ex) { Logger.getLogger(Import.class.getName()).log(Level.SEVERE, "Error in line: " + (numberOfLinesRead + 1 + 1) + ". ", ex); success = false; } catch (SQLException ex) { Logger.getLogger(Import.class.getName()).log(Level.SEVERE, "Error in line: " + (numberOfLinesRead + 1 + 1) + ". ", ex); success = false; } finally { if (parser != null) { try { parser.close(); } catch (IOException ex) { Logger.getLogger(Import.class.getName()).log(Level.SEVERE, null, ex); } } } return success; }
From source file:com.ibm.watson.developer_cloud.professor_languo.model.stack_exchange.CorpusBuilderTest.java
private void deserialiezd_duplicate_threads_should_match_original_duplicate_threads() throws IngestionException { String csvFilePath = dupCorpusBuilder.getDupThreadDirPath() + StackExchangeConstants.DUP_THREAD_TSV_FILE_NAME + StackExchangeConstants.DUP_THREAD_TSV_FILE_EXTENSION; File csvData = new File(csvFilePath); CSVParser parser;/* ww w .j a va 2 s . co m*/ List<CSVRecord> records; try { parser = CSVParser.parse(csvData, Charset.defaultCharset(), CSVFormat.TDF.withHeader()); records = parser.getRecords(); } catch (IOException e) { throw new IngestionException(e); } Set<StackExchangeThread> dupThreadSet = dupCorpusBuilder.getDupThreadSetFromBinFiles(); for (StackExchangeThread thread : dupThreadSet) { String binfileName = dupCorpusBuilder.getDupThreadDirPath() + thread.getId() + StackExchangeConstants.BIN_FILE_SUFFIX; CSVRecord matchRecord = null; for (CSVRecord record : records) if (Integer.parseInt(record.get(0)) == thread.getId()) { matchRecord = record; break; } assertTrue(matchRecord != null); // TODO haven't check the originId yet since it requires the new // method to get origin id from String deserTitle = matchRecord.get(1), deserBody = matchRecord.get(2), deserFileName = matchRecord.get(4), deserTags = matchRecord.get(5); assertEquals(deserTitle, thread.getQuestion().getTitle()); assertEquals(deserBody, thread.getQuestion().getBody()); assertEquals(deserFileName, binfileName); assertEquals(deserTags, thread.getConcatenatedTagsText()); } }