Example usage for org.apache.commons.csv CSVParser getRecords

List of usage examples for org.apache.commons.csv CSVParser getRecords

Introduction

In this page you can find the example usage for org.apache.commons.csv CSVParser getRecords.

Prototype

public List<CSVRecord> getRecords() throws IOException 

Source Link

Document

Parses the CSV input according to the given format and returns the content as a list of CSVRecord CSVRecords .

Usage

From source file:org.apache.camel.dataformat.csv.CsvRecordConvertersTest.java

@Before
public void setUp() throws Exception {
    CSVFormat format = CSVFormat.DEFAULT.withHeader("A", "B", "C");
    CSVParser parser = new CSVParser(new StringReader("1,2,3"), format);
    List<CSVRecord> records = parser.getRecords();
    record = records.get(0);/*from ww w .  ja  va  2s .  c o  m*/
}

From source file:org.apache.nifi.processors.csv.CsvToJson.java

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    FlowFile flowFile = session.get();//  w w w  .j  a  v  a 2 s. c om
    if (flowFile == null) {
        return;
    }

    final ObjectHolder<String> contentHolder = new ObjectHolder<>(null);

    session.read(flowFile, new InputStreamCallback() {
        @Override
        public void process(InputStream inputStream) throws IOException {
            contentHolder.set(IOUtils.toString(inputStream));
        }
    });

    CSVFormat csvFileFormat = CSVFormat.DEFAULT.withHeader(headers);

    try {
        CSVParser csvParser = new CSVParser(new StringReader(contentHolder.get()), csvFileFormat);

        List<CSVRecord> records = csvParser.getRecords();
        if (records.size() == 0) {
            getLogger().error("No records found");
            session.transfer(flowFile, FAILURE);
        } else if (records.size() > 1) {
            getLogger().error("More than one record found");
            session.transfer(flowFile, FAILURE);
        } else {
            final CSVRecord record = records.get(0);

            flowFile = session.write(flowFile, new OutputStreamCallback() {
                @Override
                public void process(OutputStream outputStream) throws IOException {
                    try (JsonGenerator generator = jsonFactory.createJsonGenerator(outputStream)) {
                        generator.writeStartObject();

                        Map<String, String> recordMap = record.toMap();
                        for (Map.Entry<String, String> entry : recordMap.entrySet()) {
                            generator.writeStringField(entry.getKey(), entry.getValue());
                        }

                        generator.writeEndObject();
                    }
                }
            });

            session.transfer(flowFile, SUCCESS);
        }

    } catch (IOException e) {
        getLogger().error(e.getMessage(), e);
        session.transfer(flowFile, FAILURE);
    }

}

From source file:org.apache.nifi.processors.csv.ParseCSVRecord.java

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    final FlowFile original = session.get();
    if (original == null) {
        return;/*w w  w . ja v a2  s . c  om*/
    }

    final AtomicBoolean lineFound = new AtomicBoolean(false);
    final Map<String, String> outputAttrs = new HashMap<>();

    session.read(original, new InputStreamCallback() {
        @Override
        public void process(InputStream inputStream) throws IOException {
            final String fromAttribute = context.getProperty(PROP_RECORD_FROM_ATTRIBUTE).getValue();

            String unparsedRecord;
            // data source is the attribute
            if (StringUtils.isNotBlank(fromAttribute)) {
                unparsedRecord = original.getAttribute(fromAttribute);
                if (StringUtils.isBlank(unparsedRecord)) {
                    // will be routed to failure at the end of the method implementation
                    return;
                }
            } else {
                // data source is the content
                // TODO expose the charset property?
                LineIterator iterator = IOUtils.lineIterator(inputStream, UTF_8);
                if (!iterator.hasNext()) {
                    return;
                }
                unparsedRecord = iterator.next();
            }

            lineFound.set(true);
            final String format = context.getProperty(PROP_FORMAT).getValue();
            final String delimiter = context.getProperty(PROP_DELIMITER).evaluateAttributeExpressions(original)
                    .getValue();
            final String schemaPrefix = context.getProperty(PROP_SCHEMA_ATTR_PREFIX)
                    .evaluateAttributeExpressions(original).getValue();
            final String valuePrefix = context.getProperty(PROP_VALUE_ATTR_PREFIX)
                    .evaluateAttributeExpressions(original).getValue();
            final boolean trimValues = context.getProperty(PROP_TRIM_VALUES).asBoolean();

            final CSVFormat csvFormat = buildFormat(format, delimiter, false, // this is a payload, not header anymore
                    null); // no custom header

            final CSVParser parser = csvFormat.parse(new StringReader(unparsedRecord));
            List<CSVRecord> records = parser.getRecords();
            if (records.size() > 1) {
                // TODO revisit for NiFi's native micro-batching
                throw new ProcessException("Multi-line entries not supported");
            }

            CSVRecord record = records.get(0);

            Map<String, String> originalAttrs = original.getAttributes();
            // filter delimited schema attributes only
            Map<String, String> schemaAttrs = new HashMap<>();
            for (String key : originalAttrs.keySet()) {
                if (key.startsWith(schemaPrefix)) {
                    schemaAttrs.put(key, originalAttrs.get(key));
                }
            }

            // put key/value pairs into attributes
            for (int i = 0; i < record.size(); i++) {
                String columnName = schemaAttrs.get(schemaPrefix + (i + 1)); // 1-based column numbering
                if (columnName == null) {
                    // 1-based column index
                    columnName = String.valueOf(i + 1);
                }
                // TODO indexed schemaless parsing vs auto-schema vs user-provided schema
                String columnValue = record.get(i);
                if (trimValues) {
                    columnValue = columnValue.trim();
                }
                String attrName = (StringUtils.isBlank(valuePrefix) ? "delimited.column." : valuePrefix)
                        + columnName;
                outputAttrs.put(attrName, columnValue);
            }
        }
    });

    if (lineFound.get()) {
        FlowFile ff = session.putAllAttributes(original, outputAttrs);
        session.transfer(ff, REL_SUCCESS);
    } else {
        session.transfer(original, REL_FAILURE);
    }
}

From source file:org.apache.phoenix.pherf.result.impl.CSVFileResultHandler.java

public synchronized List<Result> read() throws IOException {
    CSVParser parser = null;
    util.ensureBaseResultDirExists();/*from w ww.ja  v a  2s  .  co  m*/
    try {
        File file = new File(resultFileName);
        parser = CSVParser.parse(file, Charset.defaultCharset(), CSVFormat.DEFAULT);
        List<CSVRecord> records = parser.getRecords();
        List<Result> results = new ArrayList<>();
        String header = null;
        for (CSVRecord record : records) {

            // First record is the CSV Header
            if (record.getRecordNumber() == 1) {
                header = record.toString();
                continue;
            }
            List<ResultValue> resultValues = new ArrayList<>();
            for (String val : record.toString().split(PherfConstants.RESULT_FILE_DELIMETER)) {
                resultValues.add(new ResultValue(val));
            }
            Result result = new Result(resultFileDetails, header, resultValues);
            results.add(result);
        }
        return results;
    } finally {
        parser.close();
    }
}

From source file:org.apache.ranger.unixusersync.process.FileSourceUserGroupBuilder.java

public Map<String, List<String>> readTextFile(File textFile) throws Exception {

    Map<String, List<String>> ret = new HashMap<String, List<String>>();

    String delimiter = config.getUserSyncFileSourceDelimiter();

    CSVFormat csvFormat = CSVFormat.newFormat(delimiter.charAt(0));

    CSVParser csvParser = new CSVParser(new BufferedReader(new FileReader(textFile)), csvFormat);

    List<CSVRecord> csvRecordList = csvParser.getRecords();

    if (csvRecordList != null) {
        for (CSVRecord csvRecord : csvRecordList) {
            List<String> groups = new ArrayList<String>();
            String user = csvRecord.get(0);

            user = user.replaceAll("^\"|\"$", "");

            int i = csvRecord.size();

            for (int j = 1; j < i; j++) {
                String group = csvRecord.get(j);
                if (group != null && !group.isEmpty()) {
                    group = group.replaceAll("^\"|\"$", "");
                    groups.add(group);/*from   ww  w  . ja v  a 2s .  c om*/
                }
            }
            ret.put(user, groups);
        }
    }

    csvParser.close();

    return ret;
}

From source file:org.apache.storm.sql.runtime.serde.csv.CsvScheme.java

@Override
public List<Object> deserialize(ByteBuffer ser) {
    try {/*from  ww  w. j a  va 2s  .  com*/
        String data = new String(Utils.toByteArray(ser), StandardCharsets.UTF_8);
        CSVParser parser = CSVParser.parse(data, CSVFormat.RFC4180);
        CSVRecord record = parser.getRecords().get(0);
        Preconditions.checkArgument(record.size() == fieldNames.size(), "Invalid schema");

        ArrayList<Object> list = new ArrayList<>(fieldNames.size());
        for (int i = 0; i < record.size(); i++) {
            list.add(record.get(i));
        }
        return list;
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}

From source file:org.apache.tika.parser.isatab.ISATabAssayParser.java

@Override
public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context)
        throws IOException, SAXException, TikaException {

    // Automatically detect the character encoding
    AutoDetectReader reader = new AutoDetectReader(new CloseShieldInputStream(stream), metadata,
            context.get(ServiceLoader.class, LOADER));
    CSVParser csvParser = null;

    try {//  ww  w  .  j  a va2s. c  o m
        Charset charset = reader.getCharset();
        MediaType type = new MediaType(MediaType.application("x-isatab-assay"), charset);
        metadata.set(Metadata.CONTENT_TYPE, type.toString());
        metadata.set(Metadata.CONTENT_ENCODING, charset.name());

        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
        xhtml.newline();

        csvParser = new CSVParser(reader, CSVFormat.TDF);

        xhtml.startDocument();
        xhtml.newline();

        xhtml.startElement("table");
        xhtml.newline();

        List<CSVRecord> records = csvParser.getRecords();

        for (int i = 0; i < records.get(0).size(); i++) {
            xhtml.startElement("th");
            xhtml.characters(records.get(0).get(i));
            xhtml.endElement("th");
            xhtml.newline();
        }

        for (int i = 1; i < records.size(); i++) {
            xhtml.startElement("tr");
            xhtml.newline();

            for (int j = 0; j < records.get(i).size(); j++) {
                xhtml.startElement("td");
                xhtml.characters(records.get(i).get(j));
                xhtml.endElement("td");
                xhtml.newline();
            }

            xhtml.endElement("tr");
            xhtml.newline();
        }

        xhtml.endElement("table");
        xhtml.newline();

        xhtml.endDocument();

    } finally {
        reader.close();
        csvParser.close();
    }
}

From source file:org.apache.tika.parser.isatab.ISATabStudyParser.java

@Override
public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context)
        throws IOException, SAXException, TikaException {

    // Automatically detect the character encoding
    AutoDetectReader reader = new AutoDetectReader(new CloseShieldInputStream(stream), metadata,
            context.get(ServiceLoader.class, LOADER));
    CSVParser csvParser = null;

    try {/* w  w w  .j  a  v a 2s .  c o m*/
        Charset charset = reader.getCharset();
        MediaType type = new MediaType(MediaType.application("x-isatab-study"), charset);
        metadata.set(Metadata.CONTENT_TYPE, type.toString());
        metadata.set(Metadata.CONTENT_ENCODING, charset.name());

        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
        xhtml.newline();

        csvParser = new CSVParser(reader, CSVFormat.TDF);

        xhtml.startDocument();
        xhtml.newline();

        xhtml.startElement("table");
        xhtml.newline();

        List<CSVRecord> records = csvParser.getRecords();

        for (int i = 0; i < records.get(0).size(); i++) {
            xhtml.startElement("th");
            xhtml.characters(records.get(0).get(i));
            xhtml.endElement("th");
            xhtml.newline();
        }

        for (int i = 1; i < records.get(0).size(); i++) {
            xhtml.startElement("tr");
            xhtml.newline();

            for (int j = 0; j < records.get(i).size(); j++) {
                xhtml.startElement("td");
                xhtml.characters(records.get(i).get(j));
                xhtml.endElement("td");
                xhtml.newline();
            }

            xhtml.endElement("tr");
            xhtml.newline();
        }

        xhtml.endElement("table");
        xhtml.newline();

        xhtml.endDocument();

    } finally {
        reader.close();
        csvParser.close();
    }
}

From source file:org.etudes.mneme.tool.UploadCsv.java

/**
 * Parse the contents into CSV records./*from w  w  w . java  2s  .c  om*/
 */
protected void parse() {
    try {
        Reader in = new StringReader(this.contents);
        CSVParser parser = new CSVParser(in, CSVFormat.RFC4180);
        this.records = parser.getRecords();
        parser.close();
    } catch (IOException e) {
    } finally {

    }
}

From source file:org.failearly.dataz.internal.template.generator.csv.CsvGeneratorImpl.java

@Override
protected void doInit() {
    super.doInit();
    try {//from w ww. jav a 2  s .  c o m
        final CSVFormat format = new CsvFormatResolver(csvProperties()).resolve().getCsvFormat();
        final CSVParser parser = new CSVParser(loadResourceAsReader(getFileAttribute()), format);
        this.records = toCsvRecord(parser.getRecords());
    } catch (IOException ex) {
        throw new DataSetException("Can't load resource", ex);
    }
}