List of usage examples for org.apache.commons.csv CSVParser getHeaderMap
public Map<String, Integer> getHeaderMap()
From source file:net.tradelib.core.Series.java
static public Series fromCsv(String path, boolean header, DateTimeFormatter dtf, LocalTime lt) throws Exception { if (dtf == null) { if (lt == null) dtf = DateTimeFormatter.ISO_LOCAL_DATE_TIME; else//from ww w. jav a 2s. co m dtf = DateTimeFormatter.ISO_DATE; } // Parse and import the csv CSVFormat csvFmt = CSVFormat.DEFAULT.withCommentMarker('#').withIgnoreSurroundingSpaces(); if (header) csvFmt = csvFmt.withHeader(); CSVParser csv = csvFmt.parse(new BufferedReader(new FileReader(path))); int ncols = -1; Series result = null; double[] values = null; for (CSVRecord rec : csv.getRecords()) { if (result == null) { ncols = rec.size() - 1; values = new double[ncols]; result = new Series(ncols); } for (int ii = 0; ii < ncols; ++ii) { values[ii] = Double.parseDouble(rec.get(ii + 1)); } LocalDateTime ldt; if (lt != null) { ldt = LocalDate.parse(rec.get(0), dtf).atTime(lt); } else { ldt = LocalDateTime.parse(rec.get(0), dtf); } result.append(ldt, values); } if (header) { Map<String, Integer> headerMap = csv.getHeaderMap(); result.clearNames(); for (Map.Entry<String, Integer> me : headerMap.entrySet()) { if (me.getValue() > 0) result.setName(me.getKey(), me.getValue() - 1); } } return result; }
From source file:org.apache.nifi.csv.CSVRecordSource.java
public CSVRecordSource(final InputStream in, final PropertyContext context) throws IOException { final String charset = context.getProperty(CSVUtils.CHARSET).getValue(); final Reader reader; try {/*from w w w . j av a 2 s . c o m*/ reader = new InputStreamReader(new BOMInputStream(in), charset); } catch (UnsupportedEncodingException e) { throw new ProcessException(e); } final CSVFormat csvFormat = CSVUtils.createCSVFormat(context).withFirstRecordAsHeader().withTrim(); final CSVParser csvParser = new CSVParser(reader, csvFormat); fieldNames = Collections.unmodifiableList(new ArrayList<>(csvParser.getHeaderMap().keySet())); csvRecordIterator = csvParser.iterator(); }
From source file:org.apache.nifi.processors.csv.ExtractCSVHeader.java
@Override public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException { final FlowFile original = session.get(); if (original == null) { return;//from w w w .ja va2s.c o m } final AtomicBoolean lineFound = new AtomicBoolean(false); final Map<String, String> attrs = new HashMap<>(); final AtomicInteger headerLength = new AtomicInteger(0); session.read(original, new InputStreamCallback() { @Override public void process(InputStream inputStream) throws IOException { // TODO expose the charset property? LineIterator iterator = IOUtils.lineIterator(inputStream, UTF_8); if (iterator.hasNext()) { lineFound.set(true); final String header = iterator.nextLine(); final String format = context.getProperty(PROP_FORMAT).getValue(); final String delimiter = context.getProperty(PROP_DELIMITER) .evaluateAttributeExpressions(original).getValue(); final String prefix = context.getProperty(PROP_SCHEMA_ATTR_PREFIX) .evaluateAttributeExpressions(original).getValue(); attrs.put(prefix + ATTR_HEADER_ORIGINAL, header); // TODO validate delimiter in the callback first final CSVFormat csvFormat = buildFormat(format, delimiter, true, // we assume first line is the header null); // no custom header final CSVParser parser = csvFormat.parse(new StringReader(header)); final Map<String, Integer> headers = parser.getHeaderMap(); final int columnCount = headers.size(); attrs.put(prefix + ATTR_HEADER_COLUMN_COUNT, String.valueOf(columnCount)); for (Map.Entry<String, Integer> h : headers.entrySet()) { // CSV columns are 1-based in Excel attrs.put(prefix + (h.getValue() + 1), h.getKey()); } // strip the header and send to the 'content' relationship if (StringUtils.isNotBlank(header)) { int hLength = header.length(); // move past the new line if there are more lines if (original.getSize() > hLength + 1) { hLength++; } headerLength.set(hLength); } } } }); if (lineFound.get()) { FlowFile ff = session.putAllAttributes(original, attrs); int offset = headerLength.get(); if (offset > 0) { FlowFile contentOnly = session.clone(ff, offset, original.getSize() - offset); session.transfer(contentOnly, REL_CONTENT); } session.transfer(ff, REL_ORIGINAL); } else { session.transfer(original, REL_FAILURE); } }
From source file:org.apache.nifi.processors.ParseCSV.ParseCSV.java
@Override public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException { final Charset charset = Charset.defaultCharset(); FlowFile flowFile = session.get();//from ww w. ja v a 2 s .c o m if (flowFile == null) { return; } // TODO implement final Map<String, String> attributes = new LinkedHashMap<>(); final String format = context.getProperty(FORMAT).getValue(); final boolean create_attributes = Boolean.parseBoolean(context.getProperty(CREATE_ATTRIBUTES).getValue()); final char delimiter = context.getProperty(DELIMITER).getValue().charAt(0); final boolean with_header = Boolean.parseBoolean(context.getProperty(WITH_HEADER).getValue()); final String output_format = context.getProperty(OUTPUT_FORMAT).getValue(); final String custom_header = context.getProperty(CUSTOM_HEADER).getValue(); final String column_mask = context.getProperty(COLUMN_MASK).getValue(); final String column_encrypt = context.getProperty(COLUMN_ENCRYPT).getValue(); final String column_tokenize = context.getProperty(COLUMN_TOKENIZE).getValue(); final String tokenize_unique_identifier = context.getProperty(TOKENIZE_UNQIUE_IDENTIFIER).getValue(); final String tokenized_ouput = context.getProperty(TOKENIZED_OUTPUT).getValue(); final String encryptionKey = "Bar12345Bar12345"; final String static_schema = context.getProperty(STATIC_SCHEMA).getValue(); // new flowfile here final org.apache.nifi.util.ObjectHolder<FlowFile> holder = new org.apache.nifi.util.ObjectHolder<>(null); flowFile = session.write(flowFile, new StreamCallback() { @Override public void process(InputStream inputStream, OutputStream outputStream) throws IOException { CSVFormat csvFormat = buildFormat(format, delimiter, with_header, custom_header); CSVParser csvParser = new CSVParser(new InputStreamReader(inputStream, charset), csvFormat); CSVPrinter csvPrinter = new CSVPrinter(new OutputStreamWriter(outputStream, charset), csvFormat); String headerArray[]; ArrayList<String> columnMaskList = new ArrayList<>(); ArrayList<String> columnEncryptList = new ArrayList<String>(); ArrayList<String> columnTokenizeList = new ArrayList<String>(); List<String> maskValueHolder = new LinkedList<>(); FlowFile tokenized = session.create(); // print header if needed if (custom_header != null && output_format.equals("CSV") && static_schema == null) { csvPrinter.printRecord(custom_header); headerArray = custom_header.split(","); } else if (static_schema != null && custom_header == null) { csvPrinter.printRecord(static_schema.replace("\"", "")); headerArray = static_schema.split(","); } else { headerArray = csvParser.getHeaderMap().keySet().toArray(new String[0]); csvPrinter.printRecord(headerArray); } if (column_mask != null) { columnMaskList = new ArrayList<>(Arrays.asList(column_mask.replace("\"", "").split(","))); } if (column_encrypt != null) { columnEncryptList = new ArrayList<>(Arrays.asList(column_encrypt.split(","))); } if (column_tokenize != null) { columnTokenizeList = new ArrayList<>(Arrays.asList(column_tokenize.split(","))); } // loop through records and print for (final CSVRecord record : csvParser) { Map<String, String> k = record.toMap(); for (Map.Entry<String, String> konj : k.entrySet()) { //System.out.println(konj.getValue()); } // generate attributes if required per record if (create_attributes) { for (int i = 0; i < headerArray.length; i++) { //attributes.put(headerArray[i], record.get(i)); attributes.put(headerArray[i] + "." + record.getRecordNumber(), record.get(i)); } } // check masked columns if (column_mask != null || column_encrypt != null) { // we have to loop through the header array and match user requested mask columns for (int i = 0; i < headerArray.length; i++) { //System.out.println(headerArray[i] + "." + record.getRecordNumber() + " - " + mask(record.get(i))); if (columnMaskList.contains(headerArray[i])) { // set mask maskValueHolder.add(mask(record.get(i))); // construct tokenization row for external DB store if (columnTokenizeList.contains(headerArray[i])) { final String tokenizedRow; tokenizedRow = tokenizationOut(tokenized_ouput, headerArray[i], tokenize_unique_identifier, mask(record.get(i)), record.get(i), Long.toString(record.getRecordNumber())); tokenized = session.append(tokenized, new OutputStreamCallback() { @Override public void process(OutputStream outputStream) throws IOException { outputStream.write(tokenizedRow.getBytes()); } }); } } else if (columnEncryptList.contains(headerArray[i])) { // encrypt maskValueHolder.add(new String(Encrypt(record.get(i), encryptionKey), "UTF-8")); } else { // no mask maskValueHolder.add(record.get(i)); } } csvPrinter.printRecord(maskValueHolder); // clear mask column holder maskValueHolder.clear(); } else { // no masking or encryption required, print record switch (output_format) { case "CSV": //csvPrinter.printRecord(record); List<String> items = Arrays.asList(static_schema.split(",")); String lastColumn = items.get(items.size() - 1); String test = ""; for (String item : items) { if (item != lastColumn) { test += record.get(item) + ","; } else { test += record.get(item); } } csvPrinter.printRecord(test.replace("^\"|\"$", "")); break; case "JSON": String json = new ObjectMapper().writer().withDefaultPrettyPrinter() .writeValueAsString(record.toMap()) + "\n"; if (json.length() > 0) { outputStream.write(json.getBytes()); } //List<Map<?, ?>> data = readObjectsFromCsv(inputStream); //String adis = writeAsJson(data); //outputStream.write(writeAsJson(data).getBytes()); break; case "XML": outputStream.write(new XmlMapper().writeValueAsString(record.toMap()).getBytes()); break; } } } csvPrinter.flush(); csvPrinter.close(); holder.set(tokenized); } }); flowFile = session.putAllAttributes(flowFile, attributes); session.transfer(flowFile, RELATIONSHIP_SUCCESS); session.transfer(holder.get(), RELATIONSHIP_TOKENIZED); }
From source file:org.apache.phoenix.util.CSVCommonsLoader.java
private List<ColumnInfo> buildColumnInfoList(CSVParser parser) throws SQLException { List<String> columns = this.columns; switch (headerSource) { case FROM_TABLE: System.out.println(String.format("csv columns from database.")); break;/*w w w .j av a 2s. c o m*/ case IN_LINE: columns = new ArrayList<String>(); for (String colName : parser.getHeaderMap().keySet()) { columns.add(colName); // iterates in column order } System.out.println(String.format("csv columns from header line. length=%s, %s", columns.size(), buildStringFromList(columns))); break; case SUPPLIED_BY_USER: System.out.println(String.format("csv columns from user. length=%s, %s", columns.size(), buildStringFromList(columns))); break; default: throw new IllegalStateException("parser has unknown column source."); } return generateColumnInfo(conn, tableName, columns, isStrict); }
From source file:org.cast.cwm.service.UserSpreadsheetReader.java
/** * Read spreadsheet of user information and generate potential users. * Returns true if all was sucessful and users could be created as specified. * //from w w w .j ava2s.co m * This method does NOT modify the datastore. * * @param stream the input stream of CSV data * @return true if no errors encountered. */ @Override public boolean readInput(InputStream stream) { potentialUsers = new ArrayList<PotentialUserSave>(); potentialSites = new HashMap<String, Site>(); potentialPeriods = new HashMap<Site, Map<String, Period>>(); CSVParser parser; try { parser = CSVFormat.EXCEL.withHeader().withIgnoreEmptyLines().withIgnoreSurroundingSpaces() .parse(new InputStreamReader(new BOMInputStream(stream), "UTF-8")); } catch (IOException e) { log.error(e.getMessage()); globalError = e.getMessage(); return false; } // Make our own secondary mapping of header names to fields, by // lowercasing and removing spaces from all header names headerMap = parser.getHeaderMap(); for (String hdr : new HashSet<String>(headerMap.keySet())) { String normalized = hdr.toLowerCase().replaceAll("\\s", ""); if (!normalized.equals(hdr)) { headerMap.put(normalized, headerMap.get(hdr)); } } globalError = checkRequiredHeaders(headerMap); if (!Strings.isEmpty(globalError)) return false; // Read the CSV file, create PotentialUserSave objects, record error messages, add to potentialUsers List try { boolean errors = false; // have errors been encountered? for (CSVRecord record : parser) { try { User user = createUserObject(record); String messages = populateUserObject(user, record); if (Strings.isEmpty(messages)) messages = validateUser(user); // Add a PotentialUserSave to the list. potentialUsers.add(new PotentialUserSave(modelProvider.modelOf(user), messages, record)); if (!Strings.isEmpty(messages)) errors = true; } catch (ArrayIndexOutOfBoundsException e) { // This can happen if the last row is missing values; Excel doesn't fill them out to the last column log.error("Caught exception importing line {}: {}", parser.getCurrentLineNumber(), e.getClass()); potentialUsers.add(new PotentialUserSave(null, "Data missing from CSV.\n", record)); errors = true; } catch (Exception e) { e.printStackTrace(); log.error("Caught exception importing line {}: {}", parser.getCurrentLineNumber(), e.getClass()); potentialUsers.add(new PotentialUserSave(null, "Error: " + e, record)); errors = true; } } // If CSV file has only one line, it is either empty or has unrecognized LF/CR values. if (parser.getCurrentLineNumber() == 1) { potentialUsers.add( new PotentialUserSave(null, "Empty or Corrupted File. Note: Save as Windows CSV.", null)); globalError = "Empty or Corrupted File - LF/CR values may be invalid!"; throw new CharacterCodingException(); } return (!errors); } catch (CharacterCodingException e) { log.error("Empty or Corrupted File - only 1 line found - CR/LF issue?. {}", e.getClass()); return false; } }
From source file:org.nuxeo.ecm.csv.core.CSVImporterWork.java
protected void doImport(CSVParser parser) { log.info(String.format("Importing CSV file: %s", getBlob().getFilename())); Map<String, Integer> header = parser.getHeaderMap(); if (header == null) { logError(0, "No header line, empty file?", LABEL_CSV_IMPORTER_EMPTY_FILE); return;/* www . ja va2 s .co m*/ } if (!header.containsKey(CSV_NAME_COL)) { logError(0, "Missing 'name' column", LABEL_CSV_IMPORTER_MISSING_NAME_COLUMN); return; } hasTypeColumn = header.containsKey(CSV_TYPE_COL); try { int batchSize = options.getBatchSize(); Iterable<CSVRecord> it = parser; if (computeTotal) { try { List<CSVRecord> l = parser.getRecords(); total = l.size(); it = l; } catch (IOException e) { log.warn("Could not compute total number of document to be imported"); } } for (CSVRecord record : it) { if (record.size() == 0) { // empty record importLogs.add(new CSVImportLog(getLineNumber(record), Status.SKIPPED, "Empty record", LABEL_CSV_IMPORTER_EMPTY_LINE)); continue; } try { if (importRecord(record, header)) { docsCreatedCount++; getStore().putParameter(id, "status", new CSVImportStatus(CSVImportStatus.State.RUNNING, docsCreatedCount, total)); if (docsCreatedCount % batchSize == 0) { commitOrRollbackTransaction(); startTransaction(); } } } catch (NuxeoException e) { // try next line Throwable unwrappedException = unwrapException(e); logError(getLineNumber(parser), "Error while importing line: %s", LABEL_CSV_IMPORTER_ERROR_IMPORTING_LINE, unwrappedException.getMessage()); log.debug(unwrappedException, unwrappedException); } } try { session.save(); } catch (NuxeoException e) { Throwable ue = unwrapException(e); logError(getLineNumber(parser), "Unable to save: %s", LABEL_CSV_IMPORTER_UNABLE_TO_SAVE, ue.getMessage()); log.debug(ue, ue); } } finally { commitOrRollbackTransaction(); startTransaction(); } log.info(String.format("Done importing CSV file: %s", getBlob().getFilename())); }
From source file:org.nuxeo.ecm.user.center.profile.UserProfileImporter.java
public void doImport(CoreSession session, CSVParser parser, UserProfileService userProfileService) throws IOException { log.info(String.format("Importing CSV file: %s", dataFileName)); DocumentType docType = Framework.getLocalService(SchemaManager.class) .getDocumentType(UserProfileConstants.USER_PROFILE_DOCTYPE); if (docType == null) { log.error("The type " + UserProfileConstants.USER_PROFILE_DOCTYPE + " does not exist"); return;/* w ww . ja v a 2s.c o m*/ } Map<String, Integer> header = parser.getHeaderMap(); if (header == null) { // empty file? log.error("No header line, empty file?"); return; } // find the index for the required name and type values Integer nameIndex = header.get(UserProfileImporter.USER_PROFILE_IMPORTER_USERNAME_COL); if (nameIndex == null) { log.error("Missing 'username' column"); return; } long docsUpdatedCount = 0; totalRecords = parser.getRecordNumber(); try { int batchSize = config.getBatchSize(); long lineNumber = 0; for (CSVRecord record : parser.getRecords()) { lineNumber++; currentRecord = lineNumber; try { if (importLine(record, lineNumber, nameIndex, docType, session, userProfileService, header)) { docsUpdatedCount++; if (docsUpdatedCount % batchSize == 0) { commitOrRollbackTransaction(); startTransaction(); } } } catch (NuxeoException e) { // try next line Throwable unwrappedException = unwrapException(e); logImportError(lineNumber, "Error while importing line: %s", unwrappedException.getMessage()); log.debug(unwrappedException, unwrappedException); } } session.save(); } finally { commitOrRollbackTransaction(); startTransaction(); } log.info(String.format("Done importing %s entries from CSV file: %s", docsUpdatedCount, dataFileName)); }
From source file:org.openlmis.fulfillment.Resource2Db.java
Pair<List<String>, List<Object[]>> resourceCsvToBatchedPair(final Resource resource) throws IOException { XLOGGER.entry(resource.getDescription()); // parse CSV// w w w. ja v a 2 s .c om try (InputStreamReader isReader = new InputStreamReader( new BOMInputStream(resource.getInputStream(), ByteOrderMark.UTF_8))) { CSVParser parser = CSVFormat.DEFAULT.withHeader().withNullString("").parse(isReader); // read header row MutablePair<List<String>, List<Object[]>> readData = new MutablePair<>(); readData.setLeft(new ArrayList<>(parser.getHeaderMap().keySet())); XLOGGER.info("Read header: " + readData.getLeft()); // read data rows List<Object[]> rows = new ArrayList<>(); for (CSVRecord record : parser.getRecords()) { if (!record.isConsistent()) { throw new IllegalArgumentException("CSV record inconsistent: " + record); } List theRow = IteratorUtils.toList(record.iterator()); rows.add(theRow.toArray()); } readData.setRight(rows); XLOGGER.exit("Records read: " + readData.getRight().size()); return readData; } }
From source file:permafrost.tundra.data.IDataCSVParser.java
/** * Returns an IData representation of the CSV data in the given input stream. * * @param inputStream The input stream to be decoded. * @param charset The character set to use. * @return An IData representation of the given input stream data. * @throws IOException If there is a problem reading from the stream. *///from w w w. j a v a 2 s . com @Override public IData decode(InputStream inputStream, Charset charset) throws IOException { if (inputStream == null) return null; Reader reader = new InputStreamReader(inputStream, CharsetHelper.normalize(charset)); CSVFormat format = CSVFormat.DEFAULT.withHeader().withDelimiter(delimiter).withNullString(""); CSVParser parser = format.parse(reader); Set<String> keys = parser.getHeaderMap().keySet(); List<IData> list = new ArrayList<IData>(); for (CSVRecord record : parser) { IData document = IDataFactory.create(); IDataCursor cursor = document.getCursor(); for (String key : keys) { if (record.isSet(key)) { String value = record.get(key); if (value != null) IDataUtil.put(cursor, key, value); } } cursor.destroy(); list.add(document); } IData output = IDataFactory.create(); IDataCursor cursor = output.getCursor(); IDataUtil.put(cursor, "recordWithNoID", list.toArray(new IData[list.size()])); return output; }