List of usage examples for org.apache.commons.csv CSVParser CSVParser
public CSVParser(final Reader reader, final CSVFormat format) throws IOException
If you do not read all records from the given reader , you should call #close() on the parser, unless you close the reader .
From source file:org.apache.nifi.csv.CSVRecordReader.java
public CSVRecordReader(final InputStream in, final ComponentLog logger, final RecordSchema schema, final CSVFormat csvFormat, final boolean hasHeader, final boolean ignoreHeader, final String dateFormat, final String timeFormat, final String timestampFormat, final String encoding) throws IOException { this.schema = schema; final DateFormat df = dateFormat == null ? null : DataTypeUtils.getDateFormat(dateFormat); final DateFormat tf = timeFormat == null ? null : DataTypeUtils.getDateFormat(timeFormat); final DateFormat tsf = timestampFormat == null ? null : DataTypeUtils.getDateFormat(timestampFormat); LAZY_DATE_FORMAT = () -> df;//from w ww . j av a2s .c om LAZY_TIME_FORMAT = () -> tf; LAZY_TIMESTAMP_FORMAT = () -> tsf; final Reader reader = new InputStreamReader(new BOMInputStream(in), encoding); CSVFormat withHeader; if (hasHeader) { withHeader = csvFormat.withSkipHeaderRecord(); if (ignoreHeader) { withHeader = withHeader.withHeader(schema.getFieldNames().toArray(new String[0])); } } else { withHeader = csvFormat.withHeader(schema.getFieldNames().toArray(new String[0])); } csvParser = new CSVParser(reader, withHeader); }
From source file:org.apache.nifi.csv.CSVRecordSource.java
public CSVRecordSource(final InputStream in, final PropertyContext context) throws IOException { final String charset = context.getProperty(CSVUtils.CHARSET).getValue(); final Reader reader; try {/*w ww . java 2 s . c o m*/ reader = new InputStreamReader(new BOMInputStream(in), charset); } catch (UnsupportedEncodingException e) { throw new ProcessException(e); } final CSVFormat csvFormat = CSVUtils.createCSVFormat(context).withFirstRecordAsHeader().withTrim(); final CSVParser csvParser = new CSVParser(reader, csvFormat); fieldNames = Collections.unmodifiableList(new ArrayList<>(csvParser.getHeaderMap().keySet())); csvRecordIterator = csvParser.iterator(); }
From source file:org.apache.nifi.processors.csv.CsvToJson.java
@Override public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException { FlowFile flowFile = session.get();/*from ww w . j a v a2 s . c o m*/ if (flowFile == null) { return; } final ObjectHolder<String> contentHolder = new ObjectHolder<>(null); session.read(flowFile, new InputStreamCallback() { @Override public void process(InputStream inputStream) throws IOException { contentHolder.set(IOUtils.toString(inputStream)); } }); CSVFormat csvFileFormat = CSVFormat.DEFAULT.withHeader(headers); try { CSVParser csvParser = new CSVParser(new StringReader(contentHolder.get()), csvFileFormat); List<CSVRecord> records = csvParser.getRecords(); if (records.size() == 0) { getLogger().error("No records found"); session.transfer(flowFile, FAILURE); } else if (records.size() > 1) { getLogger().error("More than one record found"); session.transfer(flowFile, FAILURE); } else { final CSVRecord record = records.get(0); flowFile = session.write(flowFile, new OutputStreamCallback() { @Override public void process(OutputStream outputStream) throws IOException { try (JsonGenerator generator = jsonFactory.createJsonGenerator(outputStream)) { generator.writeStartObject(); Map<String, String> recordMap = record.toMap(); for (Map.Entry<String, String> entry : recordMap.entrySet()) { generator.writeStringField(entry.getKey(), entry.getValue()); } generator.writeEndObject(); } } }); session.transfer(flowFile, SUCCESS); } } catch (IOException e) { getLogger().error(e.getMessage(), e); session.transfer(flowFile, FAILURE); } }
From source file:org.apache.nifi.processors.ParseCSV.ParseCSV.java
@Override public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException { final Charset charset = Charset.defaultCharset(); FlowFile flowFile = session.get();/*from w w w. j ava 2 s . c om*/ if (flowFile == null) { return; } // TODO implement final Map<String, String> attributes = new LinkedHashMap<>(); final String format = context.getProperty(FORMAT).getValue(); final boolean create_attributes = Boolean.parseBoolean(context.getProperty(CREATE_ATTRIBUTES).getValue()); final char delimiter = context.getProperty(DELIMITER).getValue().charAt(0); final boolean with_header = Boolean.parseBoolean(context.getProperty(WITH_HEADER).getValue()); final String output_format = context.getProperty(OUTPUT_FORMAT).getValue(); final String custom_header = context.getProperty(CUSTOM_HEADER).getValue(); final String column_mask = context.getProperty(COLUMN_MASK).getValue(); final String column_encrypt = context.getProperty(COLUMN_ENCRYPT).getValue(); final String column_tokenize = context.getProperty(COLUMN_TOKENIZE).getValue(); final String tokenize_unique_identifier = context.getProperty(TOKENIZE_UNQIUE_IDENTIFIER).getValue(); final String tokenized_ouput = context.getProperty(TOKENIZED_OUTPUT).getValue(); final String encryptionKey = "Bar12345Bar12345"; final String static_schema = context.getProperty(STATIC_SCHEMA).getValue(); // new flowfile here final org.apache.nifi.util.ObjectHolder<FlowFile> holder = new org.apache.nifi.util.ObjectHolder<>(null); flowFile = session.write(flowFile, new StreamCallback() { @Override public void process(InputStream inputStream, OutputStream outputStream) throws IOException { CSVFormat csvFormat = buildFormat(format, delimiter, with_header, custom_header); CSVParser csvParser = new CSVParser(new InputStreamReader(inputStream, charset), csvFormat); CSVPrinter csvPrinter = new CSVPrinter(new OutputStreamWriter(outputStream, charset), csvFormat); String headerArray[]; ArrayList<String> columnMaskList = new ArrayList<>(); ArrayList<String> columnEncryptList = new ArrayList<String>(); ArrayList<String> columnTokenizeList = new ArrayList<String>(); List<String> maskValueHolder = new LinkedList<>(); FlowFile tokenized = session.create(); // print header if needed if (custom_header != null && output_format.equals("CSV") && static_schema == null) { csvPrinter.printRecord(custom_header); headerArray = custom_header.split(","); } else if (static_schema != null && custom_header == null) { csvPrinter.printRecord(static_schema.replace("\"", "")); headerArray = static_schema.split(","); } else { headerArray = csvParser.getHeaderMap().keySet().toArray(new String[0]); csvPrinter.printRecord(headerArray); } if (column_mask != null) { columnMaskList = new ArrayList<>(Arrays.asList(column_mask.replace("\"", "").split(","))); } if (column_encrypt != null) { columnEncryptList = new ArrayList<>(Arrays.asList(column_encrypt.split(","))); } if (column_tokenize != null) { columnTokenizeList = new ArrayList<>(Arrays.asList(column_tokenize.split(","))); } // loop through records and print for (final CSVRecord record : csvParser) { Map<String, String> k = record.toMap(); for (Map.Entry<String, String> konj : k.entrySet()) { //System.out.println(konj.getValue()); } // generate attributes if required per record if (create_attributes) { for (int i = 0; i < headerArray.length; i++) { //attributes.put(headerArray[i], record.get(i)); attributes.put(headerArray[i] + "." + record.getRecordNumber(), record.get(i)); } } // check masked columns if (column_mask != null || column_encrypt != null) { // we have to loop through the header array and match user requested mask columns for (int i = 0; i < headerArray.length; i++) { //System.out.println(headerArray[i] + "." + record.getRecordNumber() + " - " + mask(record.get(i))); if (columnMaskList.contains(headerArray[i])) { // set mask maskValueHolder.add(mask(record.get(i))); // construct tokenization row for external DB store if (columnTokenizeList.contains(headerArray[i])) { final String tokenizedRow; tokenizedRow = tokenizationOut(tokenized_ouput, headerArray[i], tokenize_unique_identifier, mask(record.get(i)), record.get(i), Long.toString(record.getRecordNumber())); tokenized = session.append(tokenized, new OutputStreamCallback() { @Override public void process(OutputStream outputStream) throws IOException { outputStream.write(tokenizedRow.getBytes()); } }); } } else if (columnEncryptList.contains(headerArray[i])) { // encrypt maskValueHolder.add(new String(Encrypt(record.get(i), encryptionKey), "UTF-8")); } else { // no mask maskValueHolder.add(record.get(i)); } } csvPrinter.printRecord(maskValueHolder); // clear mask column holder maskValueHolder.clear(); } else { // no masking or encryption required, print record switch (output_format) { case "CSV": //csvPrinter.printRecord(record); List<String> items = Arrays.asList(static_schema.split(",")); String lastColumn = items.get(items.size() - 1); String test = ""; for (String item : items) { if (item != lastColumn) { test += record.get(item) + ","; } else { test += record.get(item); } } csvPrinter.printRecord(test.replace("^\"|\"$", "")); break; case "JSON": String json = new ObjectMapper().writer().withDefaultPrettyPrinter() .writeValueAsString(record.toMap()) + "\n"; if (json.length() > 0) { outputStream.write(json.getBytes()); } //List<Map<?, ?>> data = readObjectsFromCsv(inputStream); //String adis = writeAsJson(data); //outputStream.write(writeAsJson(data).getBytes()); break; case "XML": outputStream.write(new XmlMapper().writeValueAsString(record.toMap()).getBytes()); break; } } } csvPrinter.flush(); csvPrinter.close(); holder.set(tokenized); } }); flowFile = session.putAllAttributes(flowFile, attributes); session.transfer(flowFile, RELATIONSHIP_SUCCESS); session.transfer(holder.get(), RELATIONSHIP_TOKENIZED); }
From source file:org.apache.orc.bench.convert.csv.CsvReader.java
public CsvReader(Path path, TypeDescription schema, Configuration conf, CompressionKind compress) throws IOException { FileSystem fs = path.getFileSystem(conf); InputStream input = compress.read(fs.open(path)); parser = new CSVParser(new InputStreamReader(input, StandardCharsets.UTF_8), CSVFormat.RFC4180.withHeader()) .iterator();//from w w w . ja va 2 s .co m List<TypeDescription> columnTypes = schema.getChildren(); readers = new ColumnReader[columnTypes.size()]; int c = 0; for (TypeDescription columnType : columnTypes) { readers[c++] = createReader(columnType); } }
From source file:org.apache.phoenix.end2end.CSVCommonsLoaderIT.java
@Test public void testCSVCommonsUpsert() throws Exception { CSVParser parser = null;/* w ww .jav a2 s.c o m*/ PhoenixConnection conn = null; try { // Create table String statements = "CREATE TABLE IF NOT EXISTS " + STOCK_TABLE + "(SYMBOL VARCHAR NOT NULL PRIMARY KEY, COMPANY VARCHAR);"; conn = DriverManager.getConnection(getUrl()).unwrap(PhoenixConnection.class); PhoenixRuntime.executeStatements(conn, new StringReader(statements), null); // Upsert CSV file CSVCommonsLoader csvUtil = new CSVCommonsLoader(conn, STOCK_TABLE, Collections.<String>emptyList(), true); csvUtil.upsert(new StringReader(STOCK_CSV_VALUES_WITH_HEADER)); // Compare Phoenix ResultSet with CSV file content PreparedStatement statement = conn.prepareStatement("SELECT SYMBOL, COMPANY FROM " + STOCK_TABLE); ResultSet phoenixResultSet = statement.executeQuery(); parser = new CSVParser(new StringReader(STOCK_CSV_VALUES_WITH_HEADER), csvUtil.getFormat()); for (CSVRecord record : parser) { assertTrue(phoenixResultSet.next()); int i = 0; for (String value : record) { assertEquals(value, phoenixResultSet.getString(i + 1)); i++; } } assertFalse(phoenixResultSet.next()); } finally { if (parser != null) parser.close(); if (conn != null) conn.close(); } }
From source file:org.apache.phoenix.end2end.CSVCommonsLoaderIT.java
@Test public void testCSVCommonsUpsert_MultiTenant() throws Exception { CSVParser parser = null;//w w w. j a v a 2 s . co m PhoenixConnection globalConn = null; PhoenixConnection tenantConn = null; try { // Create table using the global connection String statements = "CREATE TABLE IF NOT EXISTS " + STOCK_TABLE_MULTI + "(TENANT_ID VARCHAR NOT NULL, SYMBOL VARCHAR NOT NULL, COMPANY VARCHAR," + " CONSTRAINT PK PRIMARY KEY(TENANT_ID,SYMBOL)) MULTI_TENANT = true;"; globalConn = DriverManager.getConnection(getUrl()).unwrap(PhoenixConnection.class); PhoenixRuntime.executeStatements(globalConn, new StringReader(statements), null); globalConn.close(); tenantConn = new PhoenixTestDriver().connect(getUrl() + ";TenantId=acme", new Properties()) .unwrap(PhoenixConnection.class); // Upsert CSV file CSVCommonsLoader csvUtil = new CSVCommonsLoader(tenantConn, STOCK_TABLE_MULTI, Collections.<String>emptyList(), true); csvUtil.upsert(new StringReader(STOCK_CSV_VALUES_WITH_HEADER)); // Compare Phoenix ResultSet with CSV file content PreparedStatement statement = tenantConn .prepareStatement("SELECT SYMBOL, COMPANY FROM " + STOCK_TABLE_MULTI); ResultSet phoenixResultSet = statement.executeQuery(); parser = new CSVParser(new StringReader(STOCK_CSV_VALUES_WITH_HEADER), csvUtil.getFormat()); for (CSVRecord record : parser) { assertTrue(phoenixResultSet.next()); int i = 0; for (String value : record) { assertEquals(value, phoenixResultSet.getString(i + 1)); i++; } } assertFalse(phoenixResultSet.next()); } finally { if (parser != null) parser.close(); if (tenantConn != null) tenantConn.close(); } }
From source file:org.apache.phoenix.end2end.CSVCommonsLoaderIT.java
@Test public void testTDVCommonsUpsert() throws Exception { CSVParser parser = null;//from w w w . j av a 2s. c o m PhoenixConnection conn = null; try { // Create table String statements = "CREATE TABLE IF NOT EXISTS " + STOCK_TABLE + "(SYMBOL VARCHAR NOT NULL PRIMARY KEY, COMPANY VARCHAR);"; conn = DriverManager.getConnection(getUrl()).unwrap(PhoenixConnection.class); PhoenixRuntime.executeStatements(conn, new StringReader(statements), null); // Upsert TDV file CSVCommonsLoader csvUtil = new CSVCommonsLoader(conn, STOCK_TABLE, Collections.<String>emptyList(), true, '\t', '"', null, CSVCommonsLoader.DEFAULT_ARRAY_ELEMENT_SEPARATOR); csvUtil.upsert(new StringReader(STOCK_TDV_VALUES_WITH_HEADER)); // Compare Phoenix ResultSet with CSV file content PreparedStatement statement = conn.prepareStatement("SELECT SYMBOL, COMPANY FROM " + STOCK_TABLE); ResultSet phoenixResultSet = statement.executeQuery(); parser = new CSVParser(new StringReader(STOCK_TDV_VALUES_WITH_HEADER), csvUtil.getFormat()); for (CSVRecord record : parser) { assertTrue(phoenixResultSet.next()); int i = 0; for (String value : record) { assertEquals(value, phoenixResultSet.getString(i + 1)); i++; } } assertFalse(phoenixResultSet.next()); } finally { if (parser != null) parser.close(); if (conn != null) conn.close(); } }
From source file:org.apache.phoenix.end2end.CSVCommonsLoaderIT.java
@Test public void testCSVUpsertWithCustomDelimiters() throws Exception { CSVParser parser = null;// ww w .j a v a 2s . c o m PhoenixConnection conn = null; try { // Create table String statements = "CREATE TABLE IF NOT EXISTS " + STOCK_TABLE + "(SYMBOL VARCHAR NOT NULL PRIMARY KEY, COMPANY VARCHAR);"; conn = DriverManager.getConnection(getUrl()).unwrap(PhoenixConnection.class); PhoenixRuntime.executeStatements(conn, new StringReader(statements), null); // Upsert CSV file CSVCommonsLoader csvUtil = new CSVCommonsLoader(conn, STOCK_TABLE, Arrays.<String>asList(STOCK_COLUMNS), true, '1', '2', '3', CSVCommonsLoader.DEFAULT_ARRAY_ELEMENT_SEPARATOR); csvUtil.upsert(new StringReader(STOCK_CSV_VALUES_WITH_DELIMITER)); // Compare Phoenix ResultSet with CSV file content PreparedStatement statement = conn.prepareStatement("SELECT SYMBOL, COMPANY FROM " + STOCK_TABLE); ResultSet phoenixResultSet = statement.executeQuery(); parser = new CSVParser(new StringReader(STOCK_CSV_VALUES_WITH_DELIMITER), csvUtil.getFormat()); for (CSVRecord record : parser) { assertTrue(phoenixResultSet.next()); int i = 0; for (String value : record) { assertEquals(value, phoenixResultSet.getString(i + 1)); i++; } } assertFalse(phoenixResultSet.next()); } finally { if (parser != null) parser.close(); if (conn != null) conn.close(); } }
From source file:org.apache.phoenix.end2end.CSVCommonsLoaderIT.java
@Test public void testCSVUpsertWithColumns() throws Exception { CSVParser parser = null;/*from w w w. j a v a2s. com*/ PhoenixConnection conn = null; try { // Create table String statements = "CREATE TABLE IF NOT EXISTS " + STOCK_TABLE + "(SYMBOL VARCHAR NOT NULL PRIMARY KEY, COMPANY VARCHAR);"; conn = DriverManager.getConnection(getUrl()).unwrap(PhoenixConnection.class); PhoenixRuntime.executeStatements(conn, new StringReader(statements), null); // Upsert CSV file CSVCommonsLoader csvUtil = new CSVCommonsLoader(conn, STOCK_TABLE, Arrays.<String>asList(STOCK_COLUMNS), true); // no header csvUtil.upsert(new StringReader(STOCK_CSV_VALUES)); // Compare Phoenix ResultSet with CSV file content PreparedStatement statement = conn.prepareStatement("SELECT SYMBOL, COMPANY FROM " + STOCK_TABLE); ResultSet phoenixResultSet = statement.executeQuery(); parser = new CSVParser(new StringReader(STOCK_CSV_VALUES), csvUtil.getFormat()); for (CSVRecord record : parser) { assertTrue(phoenixResultSet.next()); int i = 0; for (String value : record) { assertEquals(value, phoenixResultSet.getString(i + 1)); i++; } } assertFalse(phoenixResultSet.next()); } finally { if (parser != null) parser.close(); if (conn != null) conn.close(); } }