List of usage examples for org.apache.commons.csv CSVRecord toMap
public Map<String, String> toMap()
From source file:org.apache.nifi.lookup.CSVRecordLookupService.java
private void loadCache() throws IllegalStateException, IOException { if (lock.tryLock()) { try {/*from w w w .j a va2 s . c o m*/ final ComponentLog logger = getLogger(); if (logger.isDebugEnabled()) { logger.debug("Loading lookup table from file: " + csvFile); } final FileReader reader = new FileReader(csvFile); final CSVParser records = csvFormat.withFirstRecordAsHeader().parse(reader); ConcurrentHashMap<String, Record> cache = new ConcurrentHashMap<>(); RecordSchema lookupRecordSchema = null; for (final CSVRecord record : records) { final String key = record.get(lookupKeyColumn); if (StringUtils.isBlank(key)) { throw new IllegalStateException("Empty lookup key encountered in: " + csvFile); } else if (!ignoreDuplicates && cache.containsKey(key)) { throw new IllegalStateException( "Duplicate lookup key encountered: " + key + " in " + csvFile); } else if (ignoreDuplicates && cache.containsKey(key)) { logger.warn("Duplicate lookup key encountered: {} in {}", new Object[] { key, csvFile }); } // Put each key/value pair (except the lookup) into the properties final Map<String, Object> properties = new HashMap<>(); record.toMap().forEach((k, v) -> { if (!lookupKeyColumn.equals(k)) { properties.put(k, v); } }); if (lookupRecordSchema == null) { List<RecordField> recordFields = new ArrayList<>(properties.size()); properties.forEach((k, v) -> recordFields .add(new RecordField(k, RecordFieldType.STRING.getDataType()))); lookupRecordSchema = new SimpleRecordSchema(recordFields); } cache.put(key, new MapRecord(lookupRecordSchema, properties)); } this.cache = cache; if (cache.isEmpty()) { logger.warn("Lookup table is empty after reading file: " + csvFile); } } finally { lock.unlock(); } } }
From source file:org.apache.nifi.processors.csv.CsvToJson.java
@Override public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException { FlowFile flowFile = session.get();//from ww w.j a v a 2s . c om if (flowFile == null) { return; } final ObjectHolder<String> contentHolder = new ObjectHolder<>(null); session.read(flowFile, new InputStreamCallback() { @Override public void process(InputStream inputStream) throws IOException { contentHolder.set(IOUtils.toString(inputStream)); } }); CSVFormat csvFileFormat = CSVFormat.DEFAULT.withHeader(headers); try { CSVParser csvParser = new CSVParser(new StringReader(contentHolder.get()), csvFileFormat); List<CSVRecord> records = csvParser.getRecords(); if (records.size() == 0) { getLogger().error("No records found"); session.transfer(flowFile, FAILURE); } else if (records.size() > 1) { getLogger().error("More than one record found"); session.transfer(flowFile, FAILURE); } else { final CSVRecord record = records.get(0); flowFile = session.write(flowFile, new OutputStreamCallback() { @Override public void process(OutputStream outputStream) throws IOException { try (JsonGenerator generator = jsonFactory.createJsonGenerator(outputStream)) { generator.writeStartObject(); Map<String, String> recordMap = record.toMap(); for (Map.Entry<String, String> entry : recordMap.entrySet()) { generator.writeStringField(entry.getKey(), entry.getValue()); } generator.writeEndObject(); } } }); session.transfer(flowFile, SUCCESS); } } catch (IOException e) { getLogger().error(e.getMessage(), e); session.transfer(flowFile, FAILURE); } }
From source file:org.apache.nifi.processors.ParseCSV.ParseCSV.java
@Override public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException { final Charset charset = Charset.defaultCharset(); FlowFile flowFile = session.get();//w ww .java 2 s .c o m if (flowFile == null) { return; } // TODO implement final Map<String, String> attributes = new LinkedHashMap<>(); final String format = context.getProperty(FORMAT).getValue(); final boolean create_attributes = Boolean.parseBoolean(context.getProperty(CREATE_ATTRIBUTES).getValue()); final char delimiter = context.getProperty(DELIMITER).getValue().charAt(0); final boolean with_header = Boolean.parseBoolean(context.getProperty(WITH_HEADER).getValue()); final String output_format = context.getProperty(OUTPUT_FORMAT).getValue(); final String custom_header = context.getProperty(CUSTOM_HEADER).getValue(); final String column_mask = context.getProperty(COLUMN_MASK).getValue(); final String column_encrypt = context.getProperty(COLUMN_ENCRYPT).getValue(); final String column_tokenize = context.getProperty(COLUMN_TOKENIZE).getValue(); final String tokenize_unique_identifier = context.getProperty(TOKENIZE_UNQIUE_IDENTIFIER).getValue(); final String tokenized_ouput = context.getProperty(TOKENIZED_OUTPUT).getValue(); final String encryptionKey = "Bar12345Bar12345"; final String static_schema = context.getProperty(STATIC_SCHEMA).getValue(); // new flowfile here final org.apache.nifi.util.ObjectHolder<FlowFile> holder = new org.apache.nifi.util.ObjectHolder<>(null); flowFile = session.write(flowFile, new StreamCallback() { @Override public void process(InputStream inputStream, OutputStream outputStream) throws IOException { CSVFormat csvFormat = buildFormat(format, delimiter, with_header, custom_header); CSVParser csvParser = new CSVParser(new InputStreamReader(inputStream, charset), csvFormat); CSVPrinter csvPrinter = new CSVPrinter(new OutputStreamWriter(outputStream, charset), csvFormat); String headerArray[]; ArrayList<String> columnMaskList = new ArrayList<>(); ArrayList<String> columnEncryptList = new ArrayList<String>(); ArrayList<String> columnTokenizeList = new ArrayList<String>(); List<String> maskValueHolder = new LinkedList<>(); FlowFile tokenized = session.create(); // print header if needed if (custom_header != null && output_format.equals("CSV") && static_schema == null) { csvPrinter.printRecord(custom_header); headerArray = custom_header.split(","); } else if (static_schema != null && custom_header == null) { csvPrinter.printRecord(static_schema.replace("\"", "")); headerArray = static_schema.split(","); } else { headerArray = csvParser.getHeaderMap().keySet().toArray(new String[0]); csvPrinter.printRecord(headerArray); } if (column_mask != null) { columnMaskList = new ArrayList<>(Arrays.asList(column_mask.replace("\"", "").split(","))); } if (column_encrypt != null) { columnEncryptList = new ArrayList<>(Arrays.asList(column_encrypt.split(","))); } if (column_tokenize != null) { columnTokenizeList = new ArrayList<>(Arrays.asList(column_tokenize.split(","))); } // loop through records and print for (final CSVRecord record : csvParser) { Map<String, String> k = record.toMap(); for (Map.Entry<String, String> konj : k.entrySet()) { //System.out.println(konj.getValue()); } // generate attributes if required per record if (create_attributes) { for (int i = 0; i < headerArray.length; i++) { //attributes.put(headerArray[i], record.get(i)); attributes.put(headerArray[i] + "." + record.getRecordNumber(), record.get(i)); } } // check masked columns if (column_mask != null || column_encrypt != null) { // we have to loop through the header array and match user requested mask columns for (int i = 0; i < headerArray.length; i++) { //System.out.println(headerArray[i] + "." + record.getRecordNumber() + " - " + mask(record.get(i))); if (columnMaskList.contains(headerArray[i])) { // set mask maskValueHolder.add(mask(record.get(i))); // construct tokenization row for external DB store if (columnTokenizeList.contains(headerArray[i])) { final String tokenizedRow; tokenizedRow = tokenizationOut(tokenized_ouput, headerArray[i], tokenize_unique_identifier, mask(record.get(i)), record.get(i), Long.toString(record.getRecordNumber())); tokenized = session.append(tokenized, new OutputStreamCallback() { @Override public void process(OutputStream outputStream) throws IOException { outputStream.write(tokenizedRow.getBytes()); } }); } } else if (columnEncryptList.contains(headerArray[i])) { // encrypt maskValueHolder.add(new String(Encrypt(record.get(i), encryptionKey), "UTF-8")); } else { // no mask maskValueHolder.add(record.get(i)); } } csvPrinter.printRecord(maskValueHolder); // clear mask column holder maskValueHolder.clear(); } else { // no masking or encryption required, print record switch (output_format) { case "CSV": //csvPrinter.printRecord(record); List<String> items = Arrays.asList(static_schema.split(",")); String lastColumn = items.get(items.size() - 1); String test = ""; for (String item : items) { if (item != lastColumn) { test += record.get(item) + ","; } else { test += record.get(item); } } csvPrinter.printRecord(test.replace("^\"|\"$", "")); break; case "JSON": String json = new ObjectMapper().writer().withDefaultPrettyPrinter() .writeValueAsString(record.toMap()) + "\n"; if (json.length() > 0) { outputStream.write(json.getBytes()); } //List<Map<?, ?>> data = readObjectsFromCsv(inputStream); //String adis = writeAsJson(data); //outputStream.write(writeAsJson(data).getBytes()); break; case "XML": outputStream.write(new XmlMapper().writeValueAsString(record.toMap()).getBytes()); break; } } } csvPrinter.flush(); csvPrinter.close(); holder.set(tokenized); } }); flowFile = session.putAllAttributes(flowFile, attributes); session.transfer(flowFile, RELATIONSHIP_SUCCESS); session.transfer(holder.get(), RELATIONSHIP_TOKENIZED); }
From source file:org.easybatch.extensions.apache.common.csv.ApacheCommonCsvRecordMapper.java
@Override public GenericRecord processRecord(final ApacheCommonCsvRecord record) throws RecordMappingException { CSVRecord csvRecord = record.getPayload(); return new GenericRecord(record.getHeader(), objectMapper.mapObject(csvRecord.toMap())); }
From source file:org.easybatch.flatfile.apache.common.csv.ApacheCommonCsvRecordMapper.java
@Override public T mapRecord(Record record) throws Exception { CSVRecord csvRecord = (CSVRecord) record.getPayload(); return objectMapper.mapObject(csvRecord.toMap()); }
From source file:org.phenotips.vocabulary.AbstractCSVAnnotationsExtension.java
/** * Processes and caches the row data. By default, it simply copies every mapped value from the row. Override if * further processing of the data is needed. * * @param row the {@link CSVRecord data row} to process * @param vocabulary the vocabulary being indexed */// w w w .j a va 2s. co m protected void processCSVRecordRow(final CSVRecord row, final Vocabulary vocabulary) { Map<String, String> csvData = row.toMap(); MultiValuedMap<String, String> termData = this.data.get(row.get(ID_KEY)); if (termData == null) { termData = new ArrayListValuedHashMap<>(); this.data.put(row.get(ID_KEY), termData); } for (Map.Entry<String, String> item : csvData.entrySet()) { if (!ID_KEY.equals(item.getKey()) && StringUtils.isNoneBlank(item.getKey(), item.getValue())) { termData.put(item.getKey(), item.getValue()); } } }
From source file:org.phenotips.vocabulary.internal.GeneNomenclature.java
@Override protected Collection<SolrInputDocument> load(URL url) { try {//from www. j ava2 s .co m Collection<SolrInputDocument> solrDocuments = new HashSet<>(); Reader in = new InputStreamReader(url.openConnection().getInputStream(), Charset.forName("UTF-8")); for (CSVRecord row : CSVFormat.TDF.withHeader().parse(in)) { SolrInputDocument crtTerm = new SolrInputDocument(); for (Map.Entry<String, String> item : row.toMap().entrySet()) { if ("hgnc_id".equals(item.getKey())) { crtTerm.addField(ID_FIELD_NAME, item.getValue()); } else if (StringUtils.isNotBlank(item.getValue())) { crtTerm.addField(item.getKey(), StringUtils.split(item.getValue(), "|")); } } solrDocuments.add(crtTerm); } addMetaInfo(solrDocuments); return solrDocuments; } catch (IOException ex) { this.logger.warn("Failed to read/parse the HGNC source: {}", ex.getMessage()); } return null; }
From source file:org.qcert.util.DataLoader.java
/** Process an individual table, producing its rows in JSON form * @param data the CSV file contents as a String * @param def the type definition as an ObjectType * @param format the CSVFormat to use/*from www . j a v a 2 s . c o m*/ * @return a JsonArray of the translation of the rows * @throws Exception */ private static JsonArray process(String data, ObjectType def, CSVFormat format) throws Exception { JsonArray ans = new JsonArray(); List<CSVRecord> records = CSVParser.parse(data, format).getRecords(); for (CSVRecord record : records) { Map<String, String> recmap = record.toMap(); JsonObject datum = new JsonObject(); for (Entry<String, String> col : recmap.entrySet()) { datum.add(col.getKey(), processColumn(col.getKey(), col.getValue(), def)); } ans.add(datum); } return ans; }
From source file:org.structr.csv.FromCsvFunction.java
@Override public Object apply(ActionContext ctx, final GraphObject entity, final Object[] sources) { if (arrayHasMinLengthAndMaxLengthAndAllElementsNotNull(sources, 1, 4)) { try {/*from w ww. j a v a2 s . c o m*/ final List<Map<String, String>> objects = new LinkedList<>(); final String source = sources[0].toString(); String delimiter = ";"; String quoteChar = "\""; String recordSeparator = "\n"; switch (sources.length) { case 4: recordSeparator = (String) sources[3]; case 3: quoteChar = (String) sources[2]; case 2: delimiter = (String) sources[1]; break; } CSVFormat format = CSVFormat.newFormat(delimiter.charAt(0)).withHeader(); format = format.withQuote(quoteChar.charAt(0)); format = format.withRecordSeparator(recordSeparator); format = format.withIgnoreEmptyLines(true); format = format.withIgnoreSurroundingSpaces(true); format = format.withSkipHeaderRecord(true); format = format.withQuoteMode(QuoteMode.ALL); CSVParser parser = new CSVParser(new StringReader(source), format); for (final CSVRecord record : parser.getRecords()) { objects.add(record.toMap()); } return objects; } catch (Throwable t) { logException(t, "{0}: Exception for parameter: {1}", new Object[] { getName(), getParametersAsString(sources) }); } return ""; } else { logParameterError(entity, sources, ctx.isJavaScriptContext()); } return usage(ctx.isJavaScriptContext()); }
From source file:org.structr.function.FromCsvFunction.java
@Override public Object apply(ActionContext ctx, final GraphObject entity, final Object[] sources) { if (sources != null && sources.length > 0) { if (sources[0] != null) { try { final List<Map<String, String>> objects = new LinkedList<>(); final String source = sources[0].toString(); String delimiter = ";"; String quoteChar = "\""; String recordSeparator = "\n"; switch (sources.length) { case 4: recordSeparator = (String) sources[3]; case 3: quoteChar = (String) sources[2]; case 2: delimiter = (String) sources[1]; break; }/*w ww. j a v a 2 s.c om*/ CSVFormat format = CSVFormat.newFormat(delimiter.charAt(0)).withHeader(); format = format.withQuote(quoteChar.charAt(0)); format = format.withRecordSeparator(recordSeparator); format = format.withIgnoreEmptyLines(true); format = format.withIgnoreSurroundingSpaces(true); format = format.withSkipHeaderRecord(true); format = format.withQuoteMode(QuoteMode.ALL); CSVParser parser = new CSVParser(new StringReader(source), format); for (final CSVRecord record : parser.getRecords()) { objects.add(record.toMap()); } return objects; } catch (Throwable t) { t.printStackTrace(); } } return ""; } return usage(ctx.isJavaScriptContext()); }