List of usage examples for org.apache.commons.csv CSVParser getHeaderMap
public Map<String, Integer> getHeaderMap()
From source file:com.ge.research.semtk.load.dataset.CSVDataset.java
/** * Initialize//from ww w . ja va2 s . c o m * @param path the CSV file path * @param headers the headers needed for this dataset * @throws Exception */ private void initialize(String path, String[] headers) throws Exception { this.csvPath = path; CSVParser parser = getParser(new FileReader(path)); this.recordIterator = parser.iterator(); this.headers = headers; // confirm that headers passed in are available in the CSVParser (case-insensitive) boolean found; for (String header : headers) { found = false; Set<String> parserHeaders = parser.getHeaderMap().keySet(); for (String parserHeader : parserHeaders) { if (parserHeader.equalsIgnoreCase(header)) { found = true; break; } } if (!found) { throw new Exception("Header '" + header + "' not found in CSV file"); } } // print all the headers we find Set<String> parserHeaders = parser.getHeaderMap().keySet(); }
From source file:com.itemanalysis.jmetrik.file.JmetrikFileImporter.java
/** * Create a header map to the CSV file, but imposes naming conventions on the column names. * *//*from w ww. j av a 2 s. co m*/ private void setVariableAttributes() { VariableAttributes variableAttributes = null; int position = 0; Reader reader = null; CSVParser parser = null; VariableName tempName = null; try { reader = new InputStreamReader(new BOMInputStream(new FileInputStream(dataFile)), "UTF-8"); parser = new CSVParser(reader, dataFileFormat.withHeader()); if (hasHeader) { Map<String, Integer> csvMap = parser.getHeaderMap(); for (String s : csvMap.keySet()) { variableAttributes = new VariableAttributes(new VariableName(s), new VariableLabel(""), DataType.INTEGER, position); variableAttributeMap.put(variableAttributes.getName(), variableAttributes); position++; } } else { Iterator<CSVRecord> iter = parser.iterator(); CSVRecord csvRecord = iter.next(); for (int i = 0; i < csvRecord.size(); i++) { variableAttributes = new VariableAttributes(new VariableName("v" + (i + 1)), new VariableLabel(""), DataType.INTEGER, position); variableAttributeMap.put(variableAttributes.getName(), variableAttributes); position++; } } } catch (IOException ex) { theException = ex; } finally { try { if (parser != null) parser.close(); if (reader != null) reader.close(); } catch (IOException ex) { theException = ex; } } }
From source file:br.ufg.calendario.components.EventoBean.java
public void uploadEvento(FileUploadEvent event) { Map<String, Object> requestMap = FacesContext.getCurrentInstance().getExternalContext().getRequestMap(); FacesMessage msg;//from w w w . j av a 2 s .c om boolean saveStatus = false; UploadedFile arquivo = event.getFile(); try { InputStream arquivoReader = arquivo.getInputstream(); Charset charset = Charset.forName("UTF-8"); CharsetDecoder decoder = charset.newDecoder(); Reader reader = new InputStreamReader(arquivoReader, decoder); CSVParser parser = new CSVParser(reader, CSVFormat.DEFAULT.withHeader().withDelimiter(configBean.getDelimiter())); SimpleDateFormat dateFormatter = new SimpleDateFormat(configBean.getDateFormat()); for (Entry<String, Integer> entry : parser.getHeaderMap().entrySet()) { System.out.format("header: %s - %d\n", entry.getKey(), entry.getValue()); } Integer ano; Calendario cal = null; List<Regional> regionais = regionalDao.listar(); List<Interessado> interessados = interessadoDao.listar(); for (CSVRecord record : parser) { //adicionar entidade calendario (select box) na tela importar eventos. ano = Integer.parseInt(record.get(0)); Date dataInicio = dateFormatter.parse(record.get(1)); Date dataTermino = dateFormatter.parse(record.get(2)); String assunto = record.get(3); String descricao = record.get(4); String[] interessadoArray = record.get(5).split(configBean.getRegexSplitter()); String[] regionalArray = record.get(6).split(configBean.getRegexSplitter()); boolean aprovado = record.get(7) != null && record.get(7).trim().toUpperCase().equals("S"); if (cal == null) { //buscar apenas uma vez cal = calendarioDao.buscar(ano); } Set<Interessado> interessadoSet = new HashSet(); for (String interessado : interessadoArray) { if (!interessado.isEmpty()) { for (Interessado i : interessados) { if (i.getNome().equals(interessado.trim())) { interessadoSet.add(i); } } } } Set<Regional> regionalSet = new HashSet(); for (String regional : regionalArray) { if (!regional.isEmpty()) { for (Regional r : regionais) { if (r.getNome().equals(regional.trim())) { regionalSet.add(r); } } } } Evento evt = new Evento(assunto, dataInicio, dataTermino, descricao, cal, regionalSet, interessadoSet, aprovado); eventosImportados.add(evt); } } catch (IOException | ParseException | ArrayIndexOutOfBoundsException | NullPointerException e) { System.out.println("erro: " + e.getMessage()); } System.out.println("arquivo enviado: " + arquivo.getFileName()); msg = new FacesMessage(FacesMessage.SEVERITY_INFO, "info", LocaleBean.getMessage("arquivoEnviado")); FacesContext.getCurrentInstance().addMessage(null, msg); RequestContext.getCurrentInstance().addCallbackParam("resultado", saveStatus); }
From source file:canreg.client.gui.components.PreviewFilePanel.java
/** * *//*from w ww .j a v a2s. co m*/ @Action public void previewAction() { // show the contents of the file BufferedReader br = null; try { changeFile(); // numberOfRecordsTextField.setText(""+(canreg.common.Tools.numberOfLinesInFile(inFile.getAbsolutePath())-1)); FileInputStream fis = new FileInputStream(inFile); br = new BufferedReader(new InputStreamReader(fis, (Charset) charsetsComboBox.getSelectedItem())); CSVFormat csvFormat = CSVFormat.DEFAULT.withFirstRecordAsHeader().withDelimiter(getSeparator()); CSVParser csvParser = new CSVParser(br, csvFormat); int linesToRead = Globals.NUMBER_OF_LINES_IN_IMPORT_PREVIEW; int numberOfLinesRead = 0; Vector<Vector<String>> data = new Vector<Vector<String>>(); String[] headers = csvParser.getHeaderMap().keySet().toArray(new String[0]); for (CSVRecord csvRecord : csvParser) { csvRecord.toMap(); Vector vec = new Vector(); Iterator<String> iterator = csvRecord.iterator(); while (iterator.hasNext()) { vec.add(iterator.next()); } data.add(vec); numberOfLinesRead++; if (numberOfLinesRead >= linesToRead) { break; } } numberOfRecordsShownTextField.setText(numberOfLinesRead + ""); // previewTextArea.setText(headers + "\n" + dataText); // previewTextArea.setCaretPosition(0); previewPanel.setVisible(true); Vector columnNames = new Vector(Arrays.asList(headers)); previewTable.setModel(new DefaultTableModel(data, columnNames)); } catch (FileNotFoundException fileNotFoundException) { JOptionPane.showInternalMessageDialog(CanRegClientApp.getApplication().getMainFrame().getContentPane(), java.util.ResourceBundle.getBundle("canreg/client/gui/dataentry/resources/ImportView") .getString("COULD_NOT_PREVIEW_FILE:") + " \'" + fileNameTextField.getText().trim() + "\'.", java.util.ResourceBundle.getBundle("canreg/client/gui/dataentry/resources/ImportView") .getString("ERROR"), JOptionPane.ERROR_MESSAGE); Logger.getLogger(PreviewFilePanel.class.getName()).log(Level.SEVERE, null, fileNotFoundException); } catch (IOException ex) { Logger.getLogger(PreviewFilePanel.class.getName()).log(Level.SEVERE, null, ex); } finally { try { if (br != null) { br.close(); } } catch (IOException ex) { Logger.getLogger(PreviewFilePanel.class.getName()).log(Level.SEVERE, null, ex); } } }
From source file:edu.harvard.iq.dataverse.ingest.tabulardata.impl.plugins.csv.CSVFileReader.java
public int readFile(BufferedReader csvReader, DataTable dataTable, PrintWriter finalOut) throws IOException { List<DataVariable> variableList = new ArrayList<>(); CSVParser parser = new CSVParser(csvReader, inFormat.withHeader()); Map<String, Integer> headers = parser.getHeaderMap(); int i = 0;//from w w w.ja va 2 s . c o m for (String varName : headers.keySet()) { if (varName == null || varName.isEmpty()) { // TODO: // Add a sensible variable name validation algorithm. // -- L.A. 4.0 alpha 1 throw new IOException(BundleUtil.getStringFromBundle("ingest.csv.invalidHeader")); } DataVariable dv = new DataVariable(); dv.setName(varName); dv.setLabel(varName); dv.setInvalidRanges(new ArrayList<>()); dv.setSummaryStatistics(new ArrayList<>()); dv.setUnf("UNF:6:NOTCALCULATED"); dv.setCategories(new ArrayList<>()); variableList.add(dv); dv.setTypeCharacter(); dv.setIntervalDiscrete(); dv.setFileOrder(i); dv.setDataTable(dataTable); i++; } dataTable.setVarQuantity((long) variableList.size()); dataTable.setDataVariables(variableList); boolean[] isNumericVariable = new boolean[headers.size()]; boolean[] isIntegerVariable = new boolean[headers.size()]; boolean[] isTimeVariable = new boolean[headers.size()]; boolean[] isDateVariable = new boolean[headers.size()]; for (i = 0; i < headers.size(); i++) { // OK, let's assume that every variable is numeric; // but we'll go through the file and examine every value; the // moment we find a value that's not a legit numeric one, we'll // assume that it is in fact a String. isNumericVariable[i] = true; isIntegerVariable[i] = true; isDateVariable[i] = true; isTimeVariable[i] = true; } // First, "learning" pass. // (we'll save the incoming stream in another temp file:) SimpleDateFormat[] selectedDateTimeFormat = new SimpleDateFormat[headers.size()]; SimpleDateFormat[] selectedDateFormat = new SimpleDateFormat[headers.size()]; File firstPassTempFile = File.createTempFile("firstpass-", ".csv"); try (CSVPrinter csvFilePrinter = new CSVPrinter( // TODO allow other parsers of tabular data to use this parser by changin inFormat new FileWriter(firstPassTempFile.getAbsolutePath()), inFormat)) { //Write headers csvFilePrinter.printRecord(headers.keySet()); for (CSVRecord record : parser.getRecords()) { // Checks if #records = #columns in header if (!record.isConsistent()) { List<String> args = Arrays.asList(new String[] { "" + (parser.getCurrentLineNumber() - 1), "" + headers.size(), "" + record.size() }); throw new IOException(BundleUtil.getStringFromBundle("ingest.csv.recordMismatch", args)); } for (i = 0; i < headers.size(); i++) { String varString = record.get(i); isIntegerVariable[i] = isIntegerVariable[i] && varString != null && (varString.isEmpty() || varString.equals("null") || (firstNumCharSet.contains(varString.charAt(0)) && StringUtils.isNumeric(varString.substring(1)))); if (isNumericVariable[i]) { // If variable might be "numeric" test to see if this value is a parsable number: if (varString != null && !varString.isEmpty()) { boolean isNumeric = false; boolean isInteger = false; if (varString.equalsIgnoreCase("NaN") || varString.equalsIgnoreCase("NA") || varString.equalsIgnoreCase("Inf") || varString.equalsIgnoreCase("+Inf") || varString.equalsIgnoreCase("-Inf") || varString.equalsIgnoreCase("null")) { continue; } else { try { Double testDoubleValue = new Double(varString); continue; } catch (NumberFormatException ex) { // the token failed to parse as a double // so the column is a string variable. } } isNumericVariable[i] = false; } } // If this is not a numeric column, see if it is a date collumn // by parsing the cell as a date or date-time value: if (!isNumericVariable[i]) { Date dateResult = null; if (isTimeVariable[i]) { if (varString != null && !varString.isEmpty()) { boolean isTime = false; if (selectedDateTimeFormat[i] != null) { ParsePosition pos = new ParsePosition(0); dateResult = selectedDateTimeFormat[i].parse(varString, pos); if (dateResult != null && pos.getIndex() == varString.length()) { // OK, successfully parsed a value! isTime = true; } } else { for (SimpleDateFormat format : TIME_FORMATS) { ParsePosition pos = new ParsePosition(0); dateResult = format.parse(varString, pos); if (dateResult != null && pos.getIndex() == varString.length()) { // OK, successfully parsed a value! isTime = true; selectedDateTimeFormat[i] = format; break; } } } if (!isTime) { isTimeVariable[i] = false; // if the token didn't parse as a time value, // we will still try to parse it as a date, below. // unless this column is NOT a date. } else { // And if it is a time value, we are going to assume it's // NOT a date. isDateVariable[i] = false; } } } if (isDateVariable[i]) { if (varString != null && !varString.isEmpty()) { boolean isDate = false; // TODO: // Strictly speaking, we should be doing the same thing // here as with the time formats above; select the // first one that works, then insist that all the // other values in this column match it... but we // only have one, as of now, so it should be ok. // -- L.A. 4.0 beta for (SimpleDateFormat format : DATE_FORMATS) { // Strict parsing - it will throw an // exception if it doesn't parse! format.setLenient(false); try { format.parse(varString); isDate = true; selectedDateFormat[i] = format; break; } catch (ParseException ex) { //Do nothing } } isDateVariable[i] = isDate; } } } } csvFilePrinter.printRecord(record); } } dataTable.setCaseQuantity(parser.getRecordNumber()); parser.close(); csvReader.close(); // Re-type the variables that we've determined are numerics: for (i = 0; i < headers.size(); i++) { if (isNumericVariable[i]) { dataTable.getDataVariables().get(i).setTypeNumeric(); if (isIntegerVariable[i]) { dataTable.getDataVariables().get(i).setIntervalDiscrete(); } else { dataTable.getDataVariables().get(i).setIntervalContinuous(); } } else if (isDateVariable[i] && selectedDateFormat[i] != null) { // Dates are still Strings, i.e., they are "character" and "discrete"; // But we add special format values for them: dataTable.getDataVariables().get(i).setFormat(DATE_FORMATS[0].toPattern()); dataTable.getDataVariables().get(i).setFormatCategory("date"); } else if (isTimeVariable[i] && selectedDateTimeFormat[i] != null) { // Same for time values: dataTable.getDataVariables().get(i).setFormat(selectedDateTimeFormat[i].toPattern()); dataTable.getDataVariables().get(i).setFormatCategory("time"); } } // Second, final pass. try (BufferedReader secondPassReader = new BufferedReader(new FileReader(firstPassTempFile))) { parser = new CSVParser(secondPassReader, inFormat.withHeader()); String[] caseRow = new String[headers.size()]; for (CSVRecord record : parser) { if (!record.isConsistent()) { List<String> args = Arrays.asList(new String[] { "" + (parser.getCurrentLineNumber() - 1), "" + headers.size(), "" + record.size() }); throw new IOException(BundleUtil.getStringFromBundle("ingest.csv.recordMismatch", args)); } for (i = 0; i < headers.size(); i++) { String varString = record.get(i); if (isNumericVariable[i]) { if (varString == null || varString.isEmpty() || varString.equalsIgnoreCase("NA")) { // Missing value - represented as an empty string in // the final tab file caseRow[i] = ""; } else if (varString.equalsIgnoreCase("NaN")) { // "Not a Number" special value: caseRow[i] = "NaN"; } else if (varString.equalsIgnoreCase("Inf") || varString.equalsIgnoreCase("+Inf")) { // Positive infinity: caseRow[i] = "Inf"; } else if (varString.equalsIgnoreCase("-Inf")) { // Negative infinity: caseRow[i] = "-Inf"; } else if (varString.equalsIgnoreCase("null")) { // By request from Gus - "NULL" is recognized as a // numeric zero: caseRow[i] = isIntegerVariable[i] ? "0" : "0.0"; } else { /* No re-formatting is done on any other numeric values. * We'll save them as they were, for archival purposes. * The alternative solution - formatting in sci. notation * is commented-out below. */ caseRow[i] = varString; /* if (isIntegerVariable[i]) { try { Integer testIntegerValue = new Integer(varString); caseRow[i] = testIntegerValue.toString(); } catch (NumberFormatException ex) { throw new IOException("Failed to parse a value recognized as an integer in the first pass! (?)"); } } else { try { Double testDoubleValue = new Double(varString); if (testDoubleValue.equals(0.0)) { caseRow[i] = "0.0"; } else { // One possible implementation: // // Round our fractional values to 15 digits // (minimum number of digits of precision guaranteed by // type Double) and format the resulting representations // in a IEEE 754-like "scientific notation" - for ex., // 753.24 will be encoded as 7.5324e2 BigDecimal testBigDecimal = new BigDecimal(varString, doubleMathContext); caseRow[i] = String.format(FORMAT_IEEE754, testBigDecimal); // Strip meaningless zeros and extra + signs: caseRow[i] = caseRow[i].replaceFirst("00*e", "e"); caseRow[i] = caseRow[i].replaceFirst("\\.e", ".0e"); caseRow[i] = caseRow[i].replaceFirst("e\\+00", ""); caseRow[i] = caseRow[i].replaceFirst("^\\+", ""); } } catch (NumberFormatException ex) { throw new IOException("Failed to parse a value recognized as numeric in the first pass! (?)"); } } */ } } else if (isTimeVariable[i] || isDateVariable[i]) { // Time and Dates are stored NOT quoted (don't ask). if (varString != null) { // Dealing with quotes: // remove the leading and trailing quotes, if present: varString = varString.replaceFirst("^\"*", ""); varString = varString.replaceFirst("\"*$", ""); caseRow[i] = varString; } else { caseRow[i] = ""; } } else { // Treat as a String: // Strings are stored in tab files quoted; // Missing values are stored as an empty string // between two tabs (or one tab and the new line); // Empty strings stored as "" (quoted empty string). // For the purposes of this CSV ingest reader, we are going // to assume that all the empty strings in the file are // indeed empty strings, and NOT missing values: if (varString != null) { // escape the quotes, newlines, and tabs: varString = varString.replace("\"", "\\\""); varString = varString.replace("\n", "\\n"); varString = varString.replace("\t", "\\t"); // final pair of quotes: varString = "\"" + varString + "\""; caseRow[i] = varString; } else { caseRow[i] = "\"\""; } } } finalOut.println(StringUtils.join(caseRow, "\t")); } } long linecount = parser.getRecordNumber(); finalOut.close(); parser.close(); dbglog.fine("Tmp File: " + firstPassTempFile); // Firstpass file is deleted to prevent tmp from filling up. firstPassTempFile.delete(); if (dataTable.getCaseQuantity().intValue() != linecount) { List<String> args = Arrays .asList(new String[] { "" + dataTable.getCaseQuantity().intValue(), "" + linecount }); throw new IOException(BundleUtil.getStringFromBundle("ingest.csv.line_mismatch", args)); } return (int) linecount; }
From source file:com.hurence.logisland.service.cache.CSVKeyValueCacheService.java
@Override // @OnEnabled//from ww w. j ava 2 s. co m public void init(ControllerServiceInitializationContext context) throws InitializationException { super.init(context); try { if (context.getPropertyValue(DATABASE_FILE_URI).isSet()) { dbUri = context.getPropertyValue(DATABASE_FILE_URI).asString(); } if (context.getPropertyValue(DATABASE_FILE_PATH).isSet()) { dbPath = context.getPropertyValue(DATABASE_FILE_PATH).asString(); } if ((dbUri == null) && (dbPath == null)) { throw new Exception( "You must declare " + DATABASE_FILE_URI.getName() + " or " + DATABASE_FILE_PATH.getName()); } InputStream is = null; if (dbUri != null) { logger.info("opening csv database from hdfs : " + dbUri); is = initFromUri(dbUri); } if (dbPath != null) { logger.info("opening csv database from local fs : " + dbPath); is = initFromPath(context, dbPath); } if (is == null) { throw new InitializationException("Something went wrong while initializing csv db from " + DATABASE_FILE_URI.getName() + " or " + DATABASE_FILE_PATH.getName()); } // final Reader reader = new InputStreamReader(is); CSVFormat format = CSVFormat.DEFAULT; if (context.getPropertyValue(CSV_FORMAT).asString().equals(CSV_EXCEL.getValue())) { format = CSVFormat.EXCEL; } else if (context.getPropertyValue(CSV_FORMAT).asString().equals(CSV_EXCEL_FR.getValue())) { format = CSVFormat.EXCEL.withDelimiter(';'); } else if (context.getPropertyValue(CSV_FORMAT).asString().equals(CSV_MYSQL.getValue())) { format = CSVFormat.MYSQL; } else if (context.getPropertyValue(CSV_FORMAT).asString().equals(CSV_RFC4180.getValue())) { format = CSVFormat.RFC4180; } else if (context.getPropertyValue(CSV_FORMAT).asString().equals(CSV_TDF.getValue())) { format = CSVFormat.TDF; } if (context.getPropertyValue(CSV_HEADER).isSet()) { String[] columnNames = context.getPropertyValue(CSV_HEADER).asString().split(","); for (String name : columnNames) { headers.get().put(name, "string"); } format = format.withHeader(columnNames); } else if (context.getPropertyValue(FIRST_LINE_HEADER).isSet()) { format = format.withFirstRecordAsHeader(); } else { throw new InitializationException("unable to get headers from somewhere"); } Charset charset = Charset.forName("UTF-8"); if (context.getPropertyValue(ENCODING_CHARSET).isSet()) { String encoding = context.getPropertyValue(ENCODING_CHARSET).asString(); charset = Charset.forName(encoding); } rowKey = context.getPropertyValue(ROW_KEY).asString(); CSVParser parser = CSVParser.parse(is, charset, format); //new CSVParser(reader, format); /* * CSVParser parser = null; if (context.getPropertyValue(ENCODING_CHARSET).isSet()) { String encoding = context.getPropertyValue(ENCODING_CHARSET).asString(); parser = CSVParser.parse(reader, Charset.forName(encoding), format); } else { parser = CSVParser.parse(reader, format); } */ long count = 0; try { final Set<String> columnNames = parser.getHeaderMap().keySet(); for (final CSVRecord record : parser) { Record logislandRecord = new StandardRecord(); for (final String column : columnNames) { logislandRecord.setStringField(column, record.get(column)); } set(logislandRecord.getField(rowKey).asString(), logislandRecord); count++; } } finally { logger.info("successfully loaded " + count + " records from CSV file"); parser.close(); is.close(); } } catch (Exception e) { getLogger().error("Could not load database file: {}", new Object[] { e.getMessage() }); throw new InitializationException(e); } }
From source file:edu.harvard.mcz.imagecapture.loader.JobVerbatimFieldLoad.java
@Override public void start() { startDateTime = new Date(); Singleton.getSingletonInstance().getJobList().addJob((RunnableJob) this); runStatus = RunStatus.STATUS_RUNNING; String selectedFilename = ""; if (file == null) { final JFileChooser fileChooser = new JFileChooser(); fileChooser.setFileSelectionMode(JFileChooser.FILES_AND_DIRECTORIES); if (Singleton.getSingletonInstance().getProperties().getProperties() .getProperty(ImageCaptureProperties.KEY_LASTLOADPATH) != null) { fileChooser.setCurrentDirectory(new File(Singleton.getSingletonInstance().getProperties() .getProperties().getProperty(ImageCaptureProperties.KEY_LASTLOADPATH))); }/* w w w .java 2s .co m*/ int returnValue = fileChooser.showOpenDialog(Singleton.getSingletonInstance().getMainFrame()); if (returnValue == JFileChooser.APPROVE_OPTION) { file = fileChooser.getSelectedFile(); } } if (file != null) { log.debug("Selected file to load: " + file.getName() + "."); if (file.exists() && file.isFile() && file.canRead()) { // Save location Singleton.getSingletonInstance().getProperties().getProperties() .setProperty(ImageCaptureProperties.KEY_LASTLOADPATH, file.getPath()); selectedFilename = file.getName(); String[] headers = new String[] {}; CSVFormat csvFormat = CSVFormat.DEFAULT.withHeader(headers); int rows = 0; try { rows = readRows(file, csvFormat); } catch (FileNotFoundException e) { JOptionPane.showMessageDialog(Singleton.getSingletonInstance().getMainFrame(), "Unable to load data, file not found: " + e.getMessage(), "Error: File Not Found", JOptionPane.OK_OPTION); errors.append("File not found ").append(e.getMessage()).append("\n"); log.error(e.getMessage(), e); } catch (IOException e) { errors.append("Error loading csv format, trying tab delimited: ").append(e.getMessage()) .append("\n"); log.debug(e.getMessage()); try { // try reading as tab delimited format, if successful, use that format. CSVFormat tabFormat = CSVFormat.newFormat('\t').withIgnoreSurroundingSpaces(true) .withHeader(headers).withQuote('"'); rows = readRows(file, tabFormat); csvFormat = tabFormat; } catch (IOException e1) { errors.append("Error Loading data: ").append(e1.getMessage()).append("\n"); log.error(e.getMessage(), e1); } } try { Reader reader = new FileReader(file); CSVParser csvParser = new CSVParser(reader, csvFormat); Map<String, Integer> csvHeader = csvParser.getHeaderMap(); headers = new String[csvHeader.size()]; int i = 0; for (String header : csvHeader.keySet()) { headers[i++] = header; log.debug(header); } boolean okToRun = true; //TODO: Work picking/checking responsibility into a FieldLoaderWizard List<String> headerList = Arrays.asList(headers); if (!headerList.contains("barcode")) { log.error("Input file " + file.getName() + " header does not contain required field 'barcode'."); // no barcode field, we can't match the input to specimen records. errors.append("Field \"barcode\" not found in csv file headers. Unable to load data.") .append("\n"); okToRun = false; } if (okToRun) { Iterator<CSVRecord> iterator = csvParser.iterator(); FieldLoader fl = new FieldLoader(); if (headerList.size() == 3 && headerList.contains("verbatimUnclassifiedText") && headerList.contains("questions") && headerList.contains("barcode")) { log.debug("Input file matches case 1: Unclassified text only."); // Allowed case 1a: unclassified text only int confirm = JOptionPane.showConfirmDialog( Singleton.getSingletonInstance().getMainFrame(), "Confirm load from file " + selectedFilename + " (" + rows + " rows) with just barcode and verbatimUnclassifiedText", "Verbatim unclassified Field found for load", JOptionPane.OK_CANCEL_OPTION); if (confirm == JOptionPane.OK_OPTION) { String barcode = ""; int lineNumber = 0; while (iterator.hasNext()) { lineNumber++; counter.incrementSpecimens(); CSVRecord record = iterator.next(); try { String verbatimUnclassifiedText = record.get("verbatimUnclassifiedText"); barcode = record.get("barcode"); String questions = record.get("questions"); fl.load(barcode, verbatimUnclassifiedText, questions, true); counter.incrementSpecimensUpdated(); } catch (IllegalArgumentException e) { RunnableJobError error = new RunnableJobError(file.getName(), barcode, Integer.toString(lineNumber), e.getClass().getSimpleName(), e, RunnableJobError.TYPE_LOAD_FAILED); counter.appendError(error); log.error(e.getMessage(), e); } catch (LoadException e) { RunnableJobError error = new RunnableJobError(file.getName(), barcode, Integer.toString(lineNumber), e.getClass().getSimpleName(), e, RunnableJobError.TYPE_LOAD_FAILED); counter.appendError(error); log.error(e.getMessage(), e); } percentComplete = (int) ((lineNumber * 100f) / rows); this.setPercentComplete(percentComplete); } } else { errors.append("Load canceled by user.").append("\n"); } } else if (headerList.size() == 4 && headerList.contains("verbatimUnclassifiedText") && headerList.contains("questions") && headerList.contains("barcode") && headerList.contains("verbatimClusterIdentifier")) { log.debug( "Input file matches case 1: Unclassified text only (with cluster identifier)."); // Allowed case 1b: unclassified text only (including cluster identifier) int confirm = JOptionPane.showConfirmDialog( Singleton.getSingletonInstance().getMainFrame(), "Confirm load from file " + selectedFilename + " (" + rows + " rows) with just barcode and verbatimUnclassifiedText", "Verbatim unclassified Field found for load", JOptionPane.OK_CANCEL_OPTION); if (confirm == JOptionPane.OK_OPTION) { String barcode = ""; int lineNumber = 0; while (iterator.hasNext()) { lineNumber++; counter.incrementSpecimens(); CSVRecord record = iterator.next(); try { String verbatimUnclassifiedText = record.get("verbatimUnclassifiedText"); String verbatimClusterIdentifier = record.get("verbatimClusterIdentifier"); barcode = record.get("barcode"); String questions = record.get("questions"); fl.load(barcode, verbatimUnclassifiedText, verbatimClusterIdentifier, questions, true); counter.incrementSpecimensUpdated(); } catch (IllegalArgumentException e) { RunnableJobError error = new RunnableJobError(file.getName(), barcode, Integer.toString(lineNumber), e.getClass().getSimpleName(), e, RunnableJobError.TYPE_LOAD_FAILED); counter.appendError(error); log.error(e.getMessage(), e); } catch (LoadException e) { RunnableJobError error = new RunnableJobError(file.getName(), barcode, Integer.toString(lineNumber), e.getClass().getSimpleName(), e, RunnableJobError.TYPE_LOAD_FAILED); counter.appendError(error); log.error(e.getMessage(), e); } percentComplete = (int) ((lineNumber * 100f) / rows); this.setPercentComplete(percentComplete); } } else { errors.append("Load canceled by user.").append("\n"); } } else if (headerList.size() == 8 && headerList.contains("verbatimUnclassifiedText") && headerList.contains("questions") && headerList.contains("barcode") && headerList.contains("verbatimLocality") && headerList.contains("verbatimDate") && headerList.contains("verbatimNumbers") && headerList.contains("verbatimCollector") && headerList.contains("verbatimCollection")) { // Allowed case two, transcription into verbatim fields, must be exact list of all // verbatim fields, not including cluster identifier or other metadata. log.debug("Input file matches case 2: Full list of verbatim fields."); int confirm = JOptionPane.showConfirmDialog( Singleton.getSingletonInstance().getMainFrame(), "Confirm load from file " + selectedFilename + " (" + rows + " rows) with just barcode and verbatim fields.", "Verbatim Fields found for load", JOptionPane.OK_CANCEL_OPTION); if (confirm == JOptionPane.OK_OPTION) { String barcode = ""; int lineNumber = 0; while (iterator.hasNext()) { lineNumber++; counter.incrementSpecimens(); CSVRecord record = iterator.next(); try { String verbatimLocality = record.get("verbatimLocality"); String verbatimDate = record.get("verbatimDate"); String verbatimCollector = record.get("verbatimCollector"); String verbatimCollection = record.get("verbatimCollection"); String verbatimNumbers = record.get("verbatimNumbers"); String verbatimUnclasifiedText = record.get("verbatimUnclassifiedText"); barcode = record.get("barcode"); String questions = record.get("questions"); fl.load(barcode, verbatimLocality, verbatimDate, verbatimCollector, verbatimCollection, verbatimNumbers, verbatimUnclasifiedText, questions); counter.incrementSpecimensUpdated(); } catch (IllegalArgumentException e) { RunnableJobError error = new RunnableJobError(file.getName(), barcode, Integer.toString(lineNumber), e.getClass().getSimpleName(), e, RunnableJobError.TYPE_LOAD_FAILED); counter.appendError(error); log.error(e.getMessage(), e); } catch (LoadException e) { RunnableJobError error = new RunnableJobError(file.getName(), barcode, Integer.toString(lineNumber), e.getClass().getSimpleName(), e, RunnableJobError.TYPE_LOAD_FAILED); counter.appendError(error); log.error(e.getMessage(), e); } percentComplete = (int) ((lineNumber * 100f) / rows); this.setPercentComplete(percentComplete); } } else { errors.append("Load canceled by user.").append("\n"); } } else { // allowed case three, transcription into arbitrary sets verbatim or other fields log.debug("Input file case 3: Arbitrary set of fields."); // Check column headers before starting run. boolean headersOK = false; try { HeaderCheckResult headerCheck = fl.checkHeaderList(headerList); if (headerCheck.isResult()) { int confirm = JOptionPane.showConfirmDialog( Singleton.getSingletonInstance().getMainFrame(), "Confirm load from file " + selectedFilename + " (" + rows + " rows) with headers: \n" + headerCheck.getMessage().replaceAll(":", ":\n"), "Fields found for load", JOptionPane.OK_CANCEL_OPTION); if (confirm == JOptionPane.OK_OPTION) { headersOK = true; } else { errors.append("Load canceled by user.").append("\n"); } } else { int confirm = JOptionPane.showConfirmDialog( Singleton.getSingletonInstance().getMainFrame(), "Problem found with headers in file, try to load anyway?\nHeaders: \n" + headerCheck.getMessage().replaceAll(":", ":\n"), "Problem in fields for load", JOptionPane.OK_CANCEL_OPTION); if (confirm == JOptionPane.OK_OPTION) { headersOK = true; } else { errors.append("Load canceled by user.").append("\n"); } } } catch (LoadException e) { errors.append("Error loading data: \n").append(e.getMessage()).append("\n"); JOptionPane.showMessageDialog(Singleton.getSingletonInstance().getMainFrame(), e.getMessage().replaceAll(":", ":\n"), "Error Loading Data: Problem Fields", JOptionPane.ERROR_MESSAGE); log.error(e.getMessage(), e); } if (headersOK) { int lineNumber = 0; while (iterator.hasNext()) { lineNumber++; Map<String, String> data = new HashMap<String, String>(); CSVRecord record = iterator.next(); String barcode = record.get("barcode"); Iterator<String> hi = headerList.iterator(); boolean containsNonVerbatim = false; while (hi.hasNext()) { String header = hi.next(); // Skip any fields prefixed by the underscore character _ if (!header.equals("barcode") && !header.startsWith("_")) { data.put(header, record.get(header)); if (!header.equals("questions") && MetadataRetriever.isFieldExternallyUpdatable(Specimen.class, header) && MetadataRetriever.isFieldVerbatim(Specimen.class, header)) { containsNonVerbatim = true; } } } if (data.size() > 0) { try { boolean updated = false; if (containsNonVerbatim) { updated = fl.loadFromMap(barcode, data, WorkFlowStatus.STAGE_CLASSIFIED, true); } else { updated = fl.loadFromMap(barcode, data, WorkFlowStatus.STAGE_VERBATIM, true); } counter.incrementSpecimens(); if (updated) { counter.incrementSpecimensUpdated(); } } catch (HibernateException e1) { // Catch (should just be development) problems with the underlying query StringBuilder message = new StringBuilder(); message.append("Query Error loading row (").append(lineNumber) .append(")[").append(barcode).append("]") .append(e1.getMessage()); RunnableJobError err = new RunnableJobError(selectedFilename, barcode, Integer.toString(lineNumber), e1.getMessage(), e1, RunnableJobError.TYPE_LOAD_FAILED); counter.appendError(err); log.error(e1.getMessage(), e1); } catch (LoadException e) { StringBuilder message = new StringBuilder(); message.append("Error loading row (").append(lineNumber).append(")[") .append(barcode).append("]").append(e.getMessage()); RunnableJobError err = new RunnableJobError(selectedFilename, barcode, Integer.toString(lineNumber), e.getMessage(), e, RunnableJobError.TYPE_LOAD_FAILED); counter.appendError(err); // errors.append(message.append("\n").toString()); log.error(e.getMessage(), e); } } percentComplete = (int) ((lineNumber * 100f) / rows); this.setPercentComplete(percentComplete); } } else { String message = "Can't load data, problem with headers."; errors.append(message).append("\n"); log.error(message); } } } csvParser.close(); reader.close(); } catch (FileNotFoundException e) { JOptionPane.showMessageDialog(Singleton.getSingletonInstance().getMainFrame(), "Unable to load data, file not found: " + e.getMessage(), "Error: File Not Found", JOptionPane.OK_OPTION); errors.append("File not found ").append(e.getMessage()).append("\n"); log.error(e.getMessage(), e); } catch (IOException e) { errors.append("Error Loading data: ").append(e.getMessage()).append("\n"); log.error(e.getMessage(), e); } } } else { //TODO: handle error condition log.error("File selection cancelled by user."); } report(selectedFilename); done(); }
From source file:ca.uhn.fhir.jpa.term.TerminologyLoaderSvc.java
private void iterateOverZipFile(List<byte[]> theZipBytes, String fileNamePart, IRecordHandler handler, char theDelimiter, QuoteMode theQuoteMode) { boolean found = false; for (byte[] nextZipBytes : theZipBytes) { ZipInputStream zis = new ZipInputStream( new BufferedInputStream(new ByteArrayInputStream(nextZipBytes))); try {/* www. j a v a 2 s . c o m*/ for (ZipEntry nextEntry; (nextEntry = zis.getNextEntry()) != null;) { ZippedFileInputStream inputStream = new ZippedFileInputStream(zis); String nextFilename = nextEntry.getName(); if (nextFilename.contains(fileNamePart)) { ourLog.info("Processing file {}", nextFilename); found = true; Reader reader = null; CSVParser parsed = null; try { reader = new InputStreamReader(zis, Charsets.UTF_8); CSVFormat format = CSVFormat.newFormat(theDelimiter).withFirstRecordAsHeader(); if (theQuoteMode != null) { format = format.withQuote('"').withQuoteMode(theQuoteMode); } parsed = new CSVParser(reader, format); Iterator<CSVRecord> iter = parsed.iterator(); ourLog.debug("Header map: {}", parsed.getHeaderMap()); int count = 0; int logIncrement = LOG_INCREMENT; int nextLoggedCount = 0; while (iter.hasNext()) { CSVRecord nextRecord = iter.next(); handler.accept(nextRecord); count++; if (count >= nextLoggedCount) { ourLog.info(" * Processed {} records in {}", count, nextFilename); nextLoggedCount += logIncrement; } } } catch (IOException e) { throw new InternalErrorException(e); } } } } catch (IOException e) { throw new InternalErrorException(e); } finally { IOUtils.closeQuietly(zis); } } // This should always be true, but just in case we've introduced a bug... Validate.isTrue(found); }
From source file:canreg.client.gui.dataentry.ImportView.java
/** * *//*w ww . j ava 2 s . co m*/ @Action public void previewAction() { // show the contents of the file BufferedReader br = null; try { changeFile(); // numberOfRecordsTextField.setText(""+(canreg.common.Tools.numberOfLinesInFile(inFile.getAbsolutePath())-1)); FileInputStream fis = new FileInputStream(inFile); br = new BufferedReader(new InputStreamReader(fis, (Charset) charsetsComboBox.getSelectedItem())); CSVFormat csvFormat = CSVFormat.DEFAULT.withFirstRecordAsHeader().withDelimiter(getSeparator()); CSVParser csvParser = new CSVParser(br, csvFormat); int linesToRead = Globals.NUMBER_OF_LINES_IN_IMPORT_PREVIEW; int numberOfLinesRead = 0; Vector<Vector<String>> data = new Vector<Vector<String>>(); String[] headers = csvParser.getHeaderMap().keySet().toArray(new String[0]); for (CSVRecord csvRecord : csvParser) { csvRecord.toMap(); Vector vec = new Vector(); Iterator<String> iterator = csvRecord.iterator(); while (iterator.hasNext()) { vec.add(iterator.next()); } data.add(vec); numberOfLinesRead++; if (numberOfLinesRead >= linesToRead) { break; } } numberOfRecordsShownTextField.setText(numberOfLinesRead + ""); // previewTextArea.setText(headers + "\n" + dataText); // previewTextArea.setCaretPosition(0); previewPanel.setVisible(true); Vector columnNames = new Vector(Arrays.asList(headers)); previewTable.setModel(new DefaultTableModel(data, columnNames)); } catch (FileNotFoundException fileNotFoundException) { JOptionPane.showInternalMessageDialog(CanRegClientApp.getApplication().getMainFrame().getContentPane(), java.util.ResourceBundle.getBundle("canreg/client/gui/dataentry/resources/ImportView") .getString("COULD_NOT_PREVIEW_FILE:") + " \'" + fileNameTextField.getText().trim() + "\'.", java.util.ResourceBundle.getBundle("canreg/client/gui/dataentry/resources/ImportView") .getString("ERROR"), JOptionPane.ERROR_MESSAGE); Logger.getLogger(ImportView.class.getName()).log(Level.SEVERE, null, fileNotFoundException); } catch (IOException ex) { JOptionPane.showInternalMessageDialog(CanRegClientApp.getApplication().getMainFrame().getContentPane(), java.util.ResourceBundle.getBundle("canreg/client/gui/dataentry/resources/ImportView") .getString("COULD_NOT_PREVIEW_FILE:") + " \'" + fileNameTextField.getText().trim() + "\'.", java.util.ResourceBundle.getBundle("canreg/client/gui/dataentry/resources/ImportView") .getString("ERROR"), JOptionPane.ERROR_MESSAGE); Logger.getLogger(ImportView.class.getName()).log(Level.SEVERE, null, ex); } finally { try { if (br != null) { br.close(); } } catch (IOException ex) { Logger.getLogger(ImportView.class.getName()).log(Level.SEVERE, null, ex); } } }
From source file:com.amazonaws.services.dynamodbv2.online.index.integration.tests.ViolationCorrectionTest.java
/** * Iterates through detection output file: first leave updates blank based on missing updates per key. * Once it has reached the missing update number, it removes the expected gsi values as per the specified 'missingGsiExpectedHashValues'. * Note that once blank number is reached, it also starts adding updates. * It then iterates over the rows again and adds values for Yes/No/Invalid in the delete column. * It returns all error records, if present. If not, it returns all records. *//*w ww .j av a 2 s. c om*/ private static List<List<String>> createCorrectionFile(final String detectionFile, final String correctionFile, final String gsiHashKeyName, final String gsiHashKeyType, final String gsiRangeKeyName, final String gsiRangeKeyType, final Map<String, String> tableHashToNewGsiHashValueMap, final Map<String, String> tableHashToNewGsiRangeValueMap, final int missingUpdatesPerKey, final int missingGsiExpectedHashValues, final int invalidValuesForDelete, final int numOfYesForDelete, final int numOfNoForDelete) throws IOException { List<List<String>> errorRecords = null; List<List<String>> allRecords = null; BufferedReader br = null; BufferedWriter bw = null; CSVParser parser = null; CSVPrinter csvPrinter = null; try { br = new BufferedReader(new FileReader(new File(detectionFile))); bw = new BufferedWriter(new FileWriter(new File(correctionFile))); parser = new CSVParser(br, TestUtils.csvFormat); csvPrinter = new CSVPrinter(bw, TestUtils.csvFormat); List<CSVRecord> detectorRecords = parser.getRecords(); int hashMissingUpdates = 0; int rangeMissingUpdates = 0; int missingGsiExpectedHashValuesCurrent = 0; // Print Header Map<String, Integer> header = parser.getHeaderMap(); csvPrinter.printRecord(header.keySet()); allRecords = new ArrayList<List<String>>(); for (CSVRecord csvRecord : detectorRecords) { List<String> newRecord = new ArrayList<String>(); String tableHashKeyRecorded = csvRecord.get(ViolationRecord.TABLE_HASH_KEY); String hashKeyViolationType = null; if (gsiHashKeyName != null) { hashKeyViolationType = csvRecord.get(ViolationRecord.GSI_HASH_KEY_VIOLATION_TYPE); } String rangeKeyViolationType = null; if (gsiRangeKeyName != null) { rangeKeyViolationType = csvRecord.get(ViolationRecord.GSI_RANGE_KEY_VIOLATION_TYPE); } for (int i = 0; i < csvRecord.size(); i++) { newRecord.add(i, csvRecord.get(i)); } String newGsiVal = null; if (hashKeyViolationType != null && (hashKeyViolationType.equals("Size Violation") || hashKeyViolationType.equals("Type Violation"))) { if (hashMissingUpdates < missingUpdatesPerKey) { allRecords.add(newRecord); hashMissingUpdates++; continue; } //Remove expected hash Values if (missingGsiExpectedHashValuesCurrent < missingGsiExpectedHashValues) { newRecord.remove((int) header.get(ViolationRecord.GSI_HASH_KEY)); newRecord.add(header.get(ViolationRecord.GSI_HASH_KEY), ""); missingGsiExpectedHashValuesCurrent++; } newRecord.remove((int) header.get(ViolationRecord.GSI_HASH_KEY_UPDATE_VALUE)); newGsiVal = getNewValue(gsiHashKeyType, 4 /*length*/); newRecord.add(header.get(ViolationRecord.GSI_HASH_KEY_UPDATE_VALUE), newGsiVal); tableHashToNewGsiHashValueMap.put(tableHashKeyRecorded, newGsiVal); } if (rangeKeyViolationType != null && (rangeKeyViolationType.equals("Size Violation") || rangeKeyViolationType.equals("Type Violation"))) { if (rangeMissingUpdates < missingUpdatesPerKey) { allRecords.add(newRecord); rangeMissingUpdates++; continue; } newRecord.remove(header.get(ViolationRecord.GSI_RANGE_KEY_UPDATE_VALUE)); newGsiVal = getNewValue(gsiRangeKeyType, 4 /*length*/); newRecord.add(header.get(ViolationRecord.GSI_RANGE_KEY_UPDATE_VALUE), newGsiVal); tableHashToNewGsiRangeValueMap.put(tableHashKeyRecorded, newGsiVal); } allRecords.add(newRecord); } // Add 'Y' or 'N' for delete column if (numOfNoForDelete > 0 || numOfYesForDelete > 0 || invalidValuesForDelete > 0) { errorRecords = new ArrayList<List<String>>(); int numOfYesAdded = 0; int numOfNoAdded = 0; int numOfInvalids = 0; for (List<String> record : allRecords) { if (numOfInvalids < invalidValuesForDelete) { record.remove(header.get(ViolationRecord.GSI_CORRECTION_DELETE_BLANK)); record.add(header.get(ViolationRecord.GSI_CORRECTION_DELETE_BLANK), "xx"); numOfInvalids++; errorRecords.add(record); continue; } if (numOfYesAdded < numOfYesForDelete) { record.remove(header.get(ViolationRecord.GSI_CORRECTION_DELETE_BLANK)); record.add(header.get(ViolationRecord.GSI_CORRECTION_DELETE_BLANK), "Y"); numOfYesAdded++; continue; } if (numOfNoAdded < numOfNoForDelete) { record.remove(header.get(ViolationRecord.GSI_CORRECTION_DELETE_BLANK)); record.add(header.get(ViolationRecord.GSI_CORRECTION_DELETE_BLANK), "N"); numOfNoAdded++; continue; } } } // Add all records to file csvPrinter.printRecords(allRecords); } finally { br.close(); bw.close(); parser.close(); csvPrinter.close(); } if (errorRecords != null) return errorRecords; else return allRecords; }