Example usage for org.apache.commons.csv CSVParser CSVParser

List of usage examples for org.apache.commons.csv CSVParser CSVParser

Introduction

In this page you can find the example usage for org.apache.commons.csv CSVParser CSVParser.

Prototype

public CSVParser(final Reader reader, final CSVFormat format) throws IOException 

Source Link

Document

Customized CSV parser using the given CSVFormat

If you do not read all records from the given reader , you should call #close() on the parser, unless you close the reader .

Usage

From source file:org.apache.nifi.csv.CSVRecordReader.java

public CSVRecordReader(final InputStream in, final ComponentLog logger, final RecordSchema schema,
        final CSVFormat csvFormat, final boolean hasHeader, final boolean ignoreHeader, final String dateFormat,
        final String timeFormat, final String timestampFormat, final String encoding) throws IOException {

    this.schema = schema;
    final DateFormat df = dateFormat == null ? null : DataTypeUtils.getDateFormat(dateFormat);
    final DateFormat tf = timeFormat == null ? null : DataTypeUtils.getDateFormat(timeFormat);
    final DateFormat tsf = timestampFormat == null ? null : DataTypeUtils.getDateFormat(timestampFormat);

    LAZY_DATE_FORMAT = () -> df;//from w ww .  j  av a2s .c  om
    LAZY_TIME_FORMAT = () -> tf;
    LAZY_TIMESTAMP_FORMAT = () -> tsf;

    final Reader reader = new InputStreamReader(new BOMInputStream(in), encoding);

    CSVFormat withHeader;
    if (hasHeader) {
        withHeader = csvFormat.withSkipHeaderRecord();

        if (ignoreHeader) {
            withHeader = withHeader.withHeader(schema.getFieldNames().toArray(new String[0]));
        }
    } else {
        withHeader = csvFormat.withHeader(schema.getFieldNames().toArray(new String[0]));
    }

    csvParser = new CSVParser(reader, withHeader);
}

From source file:org.apache.nifi.csv.CSVRecordSource.java

public CSVRecordSource(final InputStream in, final PropertyContext context) throws IOException {
    final String charset = context.getProperty(CSVUtils.CHARSET).getValue();

    final Reader reader;
    try {/*w ww  .  java  2 s .  c  o  m*/
        reader = new InputStreamReader(new BOMInputStream(in), charset);
    } catch (UnsupportedEncodingException e) {
        throw new ProcessException(e);
    }

    final CSVFormat csvFormat = CSVUtils.createCSVFormat(context).withFirstRecordAsHeader().withTrim();
    final CSVParser csvParser = new CSVParser(reader, csvFormat);
    fieldNames = Collections.unmodifiableList(new ArrayList<>(csvParser.getHeaderMap().keySet()));

    csvRecordIterator = csvParser.iterator();
}

From source file:org.apache.nifi.processors.csv.CsvToJson.java

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    FlowFile flowFile = session.get();/*from  ww  w . j  a v  a2 s  .  c  o  m*/
    if (flowFile == null) {
        return;
    }

    final ObjectHolder<String> contentHolder = new ObjectHolder<>(null);

    session.read(flowFile, new InputStreamCallback() {
        @Override
        public void process(InputStream inputStream) throws IOException {
            contentHolder.set(IOUtils.toString(inputStream));
        }
    });

    CSVFormat csvFileFormat = CSVFormat.DEFAULT.withHeader(headers);

    try {
        CSVParser csvParser = new CSVParser(new StringReader(contentHolder.get()), csvFileFormat);

        List<CSVRecord> records = csvParser.getRecords();
        if (records.size() == 0) {
            getLogger().error("No records found");
            session.transfer(flowFile, FAILURE);
        } else if (records.size() > 1) {
            getLogger().error("More than one record found");
            session.transfer(flowFile, FAILURE);
        } else {
            final CSVRecord record = records.get(0);

            flowFile = session.write(flowFile, new OutputStreamCallback() {
                @Override
                public void process(OutputStream outputStream) throws IOException {
                    try (JsonGenerator generator = jsonFactory.createJsonGenerator(outputStream)) {
                        generator.writeStartObject();

                        Map<String, String> recordMap = record.toMap();
                        for (Map.Entry<String, String> entry : recordMap.entrySet()) {
                            generator.writeStringField(entry.getKey(), entry.getValue());
                        }

                        generator.writeEndObject();
                    }
                }
            });

            session.transfer(flowFile, SUCCESS);
        }

    } catch (IOException e) {
        getLogger().error(e.getMessage(), e);
        session.transfer(flowFile, FAILURE);
    }

}

From source file:org.apache.nifi.processors.ParseCSV.ParseCSV.java

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {

    final Charset charset = Charset.defaultCharset();
    FlowFile flowFile = session.get();/*from w  w w. j  ava 2 s .  c om*/
    if (flowFile == null) {
        return;
    }
    // TODO implement
    final Map<String, String> attributes = new LinkedHashMap<>();
    final String format = context.getProperty(FORMAT).getValue();
    final boolean create_attributes = Boolean.parseBoolean(context.getProperty(CREATE_ATTRIBUTES).getValue());
    final char delimiter = context.getProperty(DELIMITER).getValue().charAt(0);
    final boolean with_header = Boolean.parseBoolean(context.getProperty(WITH_HEADER).getValue());
    final String output_format = context.getProperty(OUTPUT_FORMAT).getValue();
    final String custom_header = context.getProperty(CUSTOM_HEADER).getValue();
    final String column_mask = context.getProperty(COLUMN_MASK).getValue();
    final String column_encrypt = context.getProperty(COLUMN_ENCRYPT).getValue();
    final String column_tokenize = context.getProperty(COLUMN_TOKENIZE).getValue();
    final String tokenize_unique_identifier = context.getProperty(TOKENIZE_UNQIUE_IDENTIFIER).getValue();
    final String tokenized_ouput = context.getProperty(TOKENIZED_OUTPUT).getValue();
    final String encryptionKey = "Bar12345Bar12345";
    final String static_schema = context.getProperty(STATIC_SCHEMA).getValue();

    // new flowfile here
    final org.apache.nifi.util.ObjectHolder<FlowFile> holder = new org.apache.nifi.util.ObjectHolder<>(null);

    flowFile = session.write(flowFile, new StreamCallback() {
        @Override
        public void process(InputStream inputStream, OutputStream outputStream) throws IOException {

            CSVFormat csvFormat = buildFormat(format, delimiter, with_header, custom_header);
            CSVParser csvParser = new CSVParser(new InputStreamReader(inputStream, charset), csvFormat);
            CSVPrinter csvPrinter = new CSVPrinter(new OutputStreamWriter(outputStream, charset), csvFormat);
            String headerArray[];

            ArrayList<String> columnMaskList = new ArrayList<>();
            ArrayList<String> columnEncryptList = new ArrayList<String>();
            ArrayList<String> columnTokenizeList = new ArrayList<String>();

            List<String> maskValueHolder = new LinkedList<>();
            FlowFile tokenized = session.create();

            // print header if needed
            if (custom_header != null && output_format.equals("CSV") && static_schema == null) {
                csvPrinter.printRecord(custom_header);
                headerArray = custom_header.split(",");
            } else if (static_schema != null && custom_header == null) {
                csvPrinter.printRecord(static_schema.replace("\"", ""));
                headerArray = static_schema.split(",");
            } else {
                headerArray = csvParser.getHeaderMap().keySet().toArray(new String[0]);
                csvPrinter.printRecord(headerArray);
            }

            if (column_mask != null) {
                columnMaskList = new ArrayList<>(Arrays.asList(column_mask.replace("\"", "").split(",")));
            }

            if (column_encrypt != null) {
                columnEncryptList = new ArrayList<>(Arrays.asList(column_encrypt.split(",")));
            }

            if (column_tokenize != null) {
                columnTokenizeList = new ArrayList<>(Arrays.asList(column_tokenize.split(",")));
            }

            // loop through records and print
            for (final CSVRecord record : csvParser) {

                Map<String, String> k = record.toMap();

                for (Map.Entry<String, String> konj : k.entrySet()) {
                    //System.out.println(konj.getValue());
                }
                // generate attributes if required per record
                if (create_attributes) {
                    for (int i = 0; i < headerArray.length; i++) {
                        //attributes.put(headerArray[i], record.get(i));
                        attributes.put(headerArray[i] + "." + record.getRecordNumber(), record.get(i));
                    }
                }
                // check masked columns
                if (column_mask != null || column_encrypt != null) {
                    // we have to loop through the header array and match user requested mask columns
                    for (int i = 0; i < headerArray.length; i++) {
                        //System.out.println(headerArray[i] + "." + record.getRecordNumber() + " - " + mask(record.get(i)));

                        if (columnMaskList.contains(headerArray[i])) {
                            // set mask
                            maskValueHolder.add(mask(record.get(i)));

                            // construct tokenization row for external DB store
                            if (columnTokenizeList.contains(headerArray[i])) {
                                final String tokenizedRow;
                                tokenizedRow = tokenizationOut(tokenized_ouput, headerArray[i],
                                        tokenize_unique_identifier, mask(record.get(i)), record.get(i),
                                        Long.toString(record.getRecordNumber()));

                                tokenized = session.append(tokenized, new OutputStreamCallback() {
                                    @Override
                                    public void process(OutputStream outputStream) throws IOException {
                                        outputStream.write(tokenizedRow.getBytes());
                                    }
                                });
                            }
                        } else if (columnEncryptList.contains(headerArray[i])) {
                            // encrypt
                            maskValueHolder.add(new String(Encrypt(record.get(i), encryptionKey), "UTF-8"));
                        } else {
                            // no mask
                            maskValueHolder.add(record.get(i));
                        }
                    }
                    csvPrinter.printRecord(maskValueHolder);
                    // clear mask column holder
                    maskValueHolder.clear();
                } else {
                    // no masking or encryption required, print record
                    switch (output_format) {
                    case "CSV":
                        //csvPrinter.printRecord(record);
                        List<String> items = Arrays.asList(static_schema.split(","));
                        String lastColumn = items.get(items.size() - 1);
                        String test = "";
                        for (String item : items) {
                            if (item != lastColumn) {
                                test += record.get(item) + ",";
                            } else {
                                test += record.get(item);
                            }
                        }

                        csvPrinter.printRecord(test.replace("^\"|\"$", ""));
                        break;
                    case "JSON":
                        String json = new ObjectMapper().writer().withDefaultPrettyPrinter()
                                .writeValueAsString(record.toMap()) + "\n";
                        if (json.length() > 0) {
                            outputStream.write(json.getBytes());
                        }

                        //List<Map<?, ?>> data = readObjectsFromCsv(inputStream);
                        //String adis = writeAsJson(data);
                        //outputStream.write(writeAsJson(data).getBytes());
                        break;
                    case "XML":
                        outputStream.write(new XmlMapper().writeValueAsString(record.toMap()).getBytes());
                        break;
                    }
                }
            }
            csvPrinter.flush();
            csvPrinter.close();
            holder.set(tokenized);
        }
    });

    flowFile = session.putAllAttributes(flowFile, attributes);
    session.transfer(flowFile, RELATIONSHIP_SUCCESS);
    session.transfer(holder.get(), RELATIONSHIP_TOKENIZED);
}

From source file:org.apache.orc.bench.convert.csv.CsvReader.java

public CsvReader(Path path, TypeDescription schema, Configuration conf, CompressionKind compress)
        throws IOException {
    FileSystem fs = path.getFileSystem(conf);
    InputStream input = compress.read(fs.open(path));
    parser = new CSVParser(new InputStreamReader(input, StandardCharsets.UTF_8), CSVFormat.RFC4180.withHeader())
            .iterator();//from   w w  w  . ja  va  2  s .co m
    List<TypeDescription> columnTypes = schema.getChildren();
    readers = new ColumnReader[columnTypes.size()];
    int c = 0;
    for (TypeDescription columnType : columnTypes) {
        readers[c++] = createReader(columnType);
    }
}

From source file:org.apache.phoenix.end2end.CSVCommonsLoaderIT.java

@Test
public void testCSVCommonsUpsert() throws Exception {
    CSVParser parser = null;/* w  ww .jav  a2 s.c o m*/
    PhoenixConnection conn = null;
    try {

        // Create table
        String statements = "CREATE TABLE IF NOT EXISTS " + STOCK_TABLE
                + "(SYMBOL VARCHAR NOT NULL PRIMARY KEY, COMPANY VARCHAR);";
        conn = DriverManager.getConnection(getUrl()).unwrap(PhoenixConnection.class);
        PhoenixRuntime.executeStatements(conn, new StringReader(statements), null);

        // Upsert CSV file
        CSVCommonsLoader csvUtil = new CSVCommonsLoader(conn, STOCK_TABLE, Collections.<String>emptyList(),
                true);
        csvUtil.upsert(new StringReader(STOCK_CSV_VALUES_WITH_HEADER));

        // Compare Phoenix ResultSet with CSV file content
        PreparedStatement statement = conn.prepareStatement("SELECT SYMBOL, COMPANY FROM " + STOCK_TABLE);
        ResultSet phoenixResultSet = statement.executeQuery();
        parser = new CSVParser(new StringReader(STOCK_CSV_VALUES_WITH_HEADER), csvUtil.getFormat());
        for (CSVRecord record : parser) {
            assertTrue(phoenixResultSet.next());
            int i = 0;
            for (String value : record) {
                assertEquals(value, phoenixResultSet.getString(i + 1));
                i++;
            }
        }
        assertFalse(phoenixResultSet.next());
    } finally {
        if (parser != null)
            parser.close();
        if (conn != null)
            conn.close();
    }
}

From source file:org.apache.phoenix.end2end.CSVCommonsLoaderIT.java

@Test
public void testCSVCommonsUpsert_MultiTenant() throws Exception {
    CSVParser parser = null;//w  w w.  j a  v  a  2  s  . co  m
    PhoenixConnection globalConn = null;
    PhoenixConnection tenantConn = null;
    try {

        // Create table using the global connection
        String statements = "CREATE TABLE IF NOT EXISTS " + STOCK_TABLE_MULTI
                + "(TENANT_ID VARCHAR NOT NULL, SYMBOL VARCHAR NOT NULL, COMPANY VARCHAR,"
                + " CONSTRAINT PK PRIMARY KEY(TENANT_ID,SYMBOL)) MULTI_TENANT = true;";
        globalConn = DriverManager.getConnection(getUrl()).unwrap(PhoenixConnection.class);
        PhoenixRuntime.executeStatements(globalConn, new StringReader(statements), null);
        globalConn.close();

        tenantConn = new PhoenixTestDriver().connect(getUrl() + ";TenantId=acme", new Properties())
                .unwrap(PhoenixConnection.class);

        // Upsert CSV file
        CSVCommonsLoader csvUtil = new CSVCommonsLoader(tenantConn, STOCK_TABLE_MULTI,
                Collections.<String>emptyList(), true);
        csvUtil.upsert(new StringReader(STOCK_CSV_VALUES_WITH_HEADER));

        // Compare Phoenix ResultSet with CSV file content
        PreparedStatement statement = tenantConn
                .prepareStatement("SELECT SYMBOL, COMPANY FROM " + STOCK_TABLE_MULTI);
        ResultSet phoenixResultSet = statement.executeQuery();
        parser = new CSVParser(new StringReader(STOCK_CSV_VALUES_WITH_HEADER), csvUtil.getFormat());
        for (CSVRecord record : parser) {
            assertTrue(phoenixResultSet.next());
            int i = 0;
            for (String value : record) {
                assertEquals(value, phoenixResultSet.getString(i + 1));
                i++;
            }
        }
        assertFalse(phoenixResultSet.next());
    } finally {
        if (parser != null)
            parser.close();
        if (tenantConn != null)
            tenantConn.close();
    }
}

From source file:org.apache.phoenix.end2end.CSVCommonsLoaderIT.java

@Test
public void testTDVCommonsUpsert() throws Exception {
    CSVParser parser = null;//from w  w w . j av a 2s.  c  o  m
    PhoenixConnection conn = null;
    try {

        // Create table
        String statements = "CREATE TABLE IF NOT EXISTS " + STOCK_TABLE
                + "(SYMBOL VARCHAR NOT NULL PRIMARY KEY, COMPANY VARCHAR);";
        conn = DriverManager.getConnection(getUrl()).unwrap(PhoenixConnection.class);
        PhoenixRuntime.executeStatements(conn, new StringReader(statements), null);

        // Upsert TDV file
        CSVCommonsLoader csvUtil = new CSVCommonsLoader(conn, STOCK_TABLE, Collections.<String>emptyList(),
                true, '\t', '"', null, CSVCommonsLoader.DEFAULT_ARRAY_ELEMENT_SEPARATOR);
        csvUtil.upsert(new StringReader(STOCK_TDV_VALUES_WITH_HEADER));

        // Compare Phoenix ResultSet with CSV file content
        PreparedStatement statement = conn.prepareStatement("SELECT SYMBOL, COMPANY FROM " + STOCK_TABLE);
        ResultSet phoenixResultSet = statement.executeQuery();
        parser = new CSVParser(new StringReader(STOCK_TDV_VALUES_WITH_HEADER), csvUtil.getFormat());
        for (CSVRecord record : parser) {
            assertTrue(phoenixResultSet.next());
            int i = 0;
            for (String value : record) {
                assertEquals(value, phoenixResultSet.getString(i + 1));
                i++;
            }
        }
        assertFalse(phoenixResultSet.next());
    } finally {
        if (parser != null)
            parser.close();
        if (conn != null)
            conn.close();
    }
}

From source file:org.apache.phoenix.end2end.CSVCommonsLoaderIT.java

@Test
public void testCSVUpsertWithCustomDelimiters() throws Exception {
    CSVParser parser = null;// ww  w .j  a v  a  2s . c  o m
    PhoenixConnection conn = null;
    try {
        // Create table
        String statements = "CREATE TABLE IF NOT EXISTS " + STOCK_TABLE
                + "(SYMBOL VARCHAR NOT NULL PRIMARY KEY, COMPANY VARCHAR);";
        conn = DriverManager.getConnection(getUrl()).unwrap(PhoenixConnection.class);
        PhoenixRuntime.executeStatements(conn, new StringReader(statements), null);

        // Upsert CSV file
        CSVCommonsLoader csvUtil = new CSVCommonsLoader(conn, STOCK_TABLE, Arrays.<String>asList(STOCK_COLUMNS),
                true, '1', '2', '3', CSVCommonsLoader.DEFAULT_ARRAY_ELEMENT_SEPARATOR);
        csvUtil.upsert(new StringReader(STOCK_CSV_VALUES_WITH_DELIMITER));

        // Compare Phoenix ResultSet with CSV file content
        PreparedStatement statement = conn.prepareStatement("SELECT SYMBOL, COMPANY FROM " + STOCK_TABLE);
        ResultSet phoenixResultSet = statement.executeQuery();
        parser = new CSVParser(new StringReader(STOCK_CSV_VALUES_WITH_DELIMITER), csvUtil.getFormat());
        for (CSVRecord record : parser) {
            assertTrue(phoenixResultSet.next());
            int i = 0;
            for (String value : record) {
                assertEquals(value, phoenixResultSet.getString(i + 1));
                i++;
            }
        }
        assertFalse(phoenixResultSet.next());
    } finally {
        if (parser != null)
            parser.close();
        if (conn != null)
            conn.close();
    }
}

From source file:org.apache.phoenix.end2end.CSVCommonsLoaderIT.java

@Test
public void testCSVUpsertWithColumns() throws Exception {
    CSVParser parser = null;/*from   w w  w. j  a v  a2s.  com*/
    PhoenixConnection conn = null;
    try {
        // Create table
        String statements = "CREATE TABLE IF NOT EXISTS " + STOCK_TABLE
                + "(SYMBOL VARCHAR NOT NULL PRIMARY KEY, COMPANY VARCHAR);";
        conn = DriverManager.getConnection(getUrl()).unwrap(PhoenixConnection.class);
        PhoenixRuntime.executeStatements(conn, new StringReader(statements), null);

        // Upsert CSV file
        CSVCommonsLoader csvUtil = new CSVCommonsLoader(conn, STOCK_TABLE, Arrays.<String>asList(STOCK_COLUMNS),
                true);
        // no header
        csvUtil.upsert(new StringReader(STOCK_CSV_VALUES));

        // Compare Phoenix ResultSet with CSV file content
        PreparedStatement statement = conn.prepareStatement("SELECT SYMBOL, COMPANY FROM " + STOCK_TABLE);
        ResultSet phoenixResultSet = statement.executeQuery();
        parser = new CSVParser(new StringReader(STOCK_CSV_VALUES), csvUtil.getFormat());
        for (CSVRecord record : parser) {
            assertTrue(phoenixResultSet.next());
            int i = 0;
            for (String value : record) {
                assertEquals(value, phoenixResultSet.getString(i + 1));
                i++;
            }
        }

        assertFalse(phoenixResultSet.next());
    } finally {
        if (parser != null)
            parser.close();
        if (conn != null)
            conn.close();
    }
}