Example usage for org.apache.commons.csv CSVParser getRecords

Introduction

In this page you can find the example usage for org.apache.commons.csv CSVParser getRecords.

Prototype

public List<CSVRecord> getRecords() throws IOException

Source Link

Document

Parses the CSV input according to the given format and returns the content as a list of CSVRecord CSVRecords .

Usage

From source file:org.apache.camel.dataformat.csv.CsvRecordConvertersTest.java

@Before
public void setUp() throws Exception {
    CSVFormat format = CSVFormat.DEFAULT.withHeader("A", "B", "C");
    CSVParser parser = new CSVParser(new StringReader("1,2,3"), format);
    List<CSVRecord> records = parser.getRecords();
    record = records.get(0);/*from ww w .  ja  va  2s .  c o  m*/
}

From source file:org.apache.nifi.processors.csv.CsvToJson.java

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    FlowFile flowFile = session.get();//  w w w  .j  a  v  a 2 s. c om
    if (flowFile == null) {
        return;
    }

    final ObjectHolder<String> contentHolder = new ObjectHolder<>(null);

    session.read(flowFile, new InputStreamCallback() {
        @Override
        public void process(InputStream inputStream) throws IOException {
            contentHolder.set(IOUtils.toString(inputStream));
        }
    });

    CSVFormat csvFileFormat = CSVFormat.DEFAULT.withHeader(headers);

    try {
        CSVParser csvParser = new CSVParser(new StringReader(contentHolder.get()), csvFileFormat);

        List<CSVRecord> records = csvParser.getRecords();
        if (records.size() == 0) {
            getLogger().error("No records found");
            session.transfer(flowFile, FAILURE);
        } else if (records.size() > 1) {
            getLogger().error("More than one record found");
            session.transfer(flowFile, FAILURE);
        } else {
            final CSVRecord record = records.get(0);

            flowFile = session.write(flowFile, new OutputStreamCallback() {
                @Override
                public void process(OutputStream outputStream) throws IOException {
                    try (JsonGenerator generator = jsonFactory.createJsonGenerator(outputStream)) {
                        generator.writeStartObject();

                        Map<String, String> recordMap = record.toMap();
                        for (Map.Entry<String, String> entry : recordMap.entrySet()) {
                            generator.writeStringField(entry.getKey(), entry.getValue());
                        }

                        generator.writeEndObject();
                    }
                }
            });

            session.transfer(flowFile, SUCCESS);
        }

    } catch (IOException e) {
        getLogger().error(e.getMessage(), e);
        session.transfer(flowFile, FAILURE);
    }

}

From source file:org.apache.nifi.processors.csv.ParseCSVRecord.java

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    final FlowFile original = session.get();
    if (original == null) {
        return;/*w w  w . ja v a2  s . c  om*/
    }

    final AtomicBoolean lineFound = new AtomicBoolean(false);
    final Map<String, String> outputAttrs = new HashMap<>();

    session.read(original, new InputStreamCallback() {
        @Override
        public void process(InputStream inputStream) throws IOException {
            final String fromAttribute = context.getProperty(PROP_RECORD_FROM_ATTRIBUTE).getValue();

            String unparsedRecord;
            // data source is the attribute
            if (StringUtils.isNotBlank(fromAttribute)) {
                unparsedRecord = original.getAttribute(fromAttribute);
                if (StringUtils.isBlank(unparsedRecord)) {
                    // will be routed to failure at the end of the method implementation
                    return;
                }
            } else {
                // data source is the content
                // TODO expose the charset property?
                LineIterator iterator = IOUtils.lineIterator(inputStream, UTF_8);
                if (!iterator.hasNext()) {
                    return;
                }
                unparsedRecord = iterator.next();
            }

            lineFound.set(true);
            final String format = context.getProperty(PROP_FORMAT).getValue();
            final String delimiter = context.getProperty(PROP_DELIMITER).evaluateAttributeExpressions(original)
                    .getValue();
            final String schemaPrefix = context.getProperty(PROP_SCHEMA_ATTR_PREFIX)
                    .evaluateAttributeExpressions(original).getValue();
            final String valuePrefix = context.getProperty(PROP_VALUE_ATTR_PREFIX)
                    .evaluateAttributeExpressions(original).getValue();
            final boolean trimValues = context.getProperty(PROP_TRIM_VALUES).asBoolean();

            final CSVFormat csvFormat = buildFormat(format, delimiter, false, // this is a payload, not header anymore
                    null); // no custom header

            final CSVParser parser = csvFormat.parse(new StringReader(unparsedRecord));
            List<CSVRecord> records = parser.getRecords();
            if (records.size() > 1) {
                // TODO revisit for NiFi's native micro-batching
                throw new ProcessException("Multi-line entries not supported");
            }

            CSVRecord record = records.get(0);

            Map<String, String> originalAttrs = original.getAttributes();
            // filter delimited schema attributes only
            Map<String, String> schemaAttrs = new HashMap<>();
            for (String key : originalAttrs.keySet()) {
                if (key.startsWith(schemaPrefix)) {
                    schemaAttrs.put(key, originalAttrs.get(key));
                }
            }

            // put key/value pairs into attributes
            for (int i = 0; i < record.size(); i++) {
                String columnName = schemaAttrs.get(schemaPrefix + (i + 1)); // 1-based column numbering
                if (columnName == null) {
                    // 1-based column index
                    columnName = String.valueOf(i + 1);
                }
                // TODO indexed schemaless parsing vs auto-schema vs user-provided schema
                String columnValue = record.get(i);
                if (trimValues) {
                    columnValue = columnValue.trim();
                }
                String attrName = (StringUtils.isBlank(valuePrefix) ? "delimited.column." : valuePrefix)
                        + columnName;
                outputAttrs.put(attrName, columnValue);
            }
        }
    });

    if (lineFound.get()) {
        FlowFile ff = session.putAllAttributes(original, outputAttrs);
        session.transfer(ff, REL_SUCCESS);
    } else {
        session.transfer(original, REL_FAILURE);
    }
}

From source file:org.apache.phoenix.pherf.result.impl.CSVFileResultHandler.java

public synchronized List<Result> read() throws IOException {
    CSVParser parser = null;
    util.ensureBaseResultDirExists();/*from w ww.ja  v a  2s  .  co  m*/
    try {
        File file = new File(resultFileName);
        parser = CSVParser.parse(file, Charset.defaultCharset(), CSVFormat.DEFAULT);
        List<CSVRecord> records = parser.getRecords();
        List<Result> results = new ArrayList<>();
        String header = null;
        for (CSVRecord record : records) {

            // First record is the CSV Header
            if (record.getRecordNumber() == 1) {
                header = record.toString();
                continue;
            }
            List<ResultValue> resultValues = new ArrayList<>();
            for (String val : record.toString().split(PherfConstants.RESULT_FILE_DELIMETER)) {
                resultValues.add(new ResultValue(val));
            }
            Result result = new Result(resultFileDetails, header, resultValues);
            results.add(result);
        }
        return results;
    } finally {
        parser.close();
    }
}

From source file:org.apache.ranger.unixusersync.process.FileSourceUserGroupBuilder.java

public Map<String, List<String>> readTextFile(File textFile) throws Exception {

    Map<String, List<String>> ret = new HashMap<String, List<String>>();

    String delimiter = config.getUserSyncFileSourceDelimiter();

    CSVFormat csvFormat = CSVFormat.newFormat(delimiter.charAt(0));

    CSVParser csvParser = new CSVParser(new BufferedReader(new FileReader(textFile)), csvFormat);

    List<CSVRecord> csvRecordList = csvParser.getRecords();

    if (csvRecordList != null) {
        for (CSVRecord csvRecord : csvRecordList) {
            List<String> groups = new ArrayList<String>();
            String user = csvRecord.get(0);

            user = user.replaceAll("^\"|\"$", "");

            int i = csvRecord.size();

            for (int j = 1; j < i; j++) {
                String group = csvRecord.get(j);
                if (group != null && !group.isEmpty()) {
                    group = group.replaceAll("^\"|\"$", "");
                    groups.add(group);/*from   ww  w  . ja v  a 2s .  c om*/
                }
            }
            ret.put(user, groups);
        }
    }

    csvParser.close();

    return ret;
}

From source file:org.apache.storm.sql.runtime.serde.csv.CsvScheme.java

@Override
public List<Object> deserialize(ByteBuffer ser) {
    try {/*from  ww  w. j a  va 2s  .  com*/
        String data = new String(Utils.toByteArray(ser), StandardCharsets.UTF_8);
        CSVParser parser = CSVParser.parse(data, CSVFormat.RFC4180);
        CSVRecord record = parser.getRecords().get(0);
        Preconditions.checkArgument(record.size() == fieldNames.size(), "Invalid schema");

        ArrayList<Object> list = new ArrayList<>(fieldNames.size());
        for (int i = 0; i < record.size(); i++) {
            list.add(record.get(i));
        }
        return list;
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}

From source file:org.apache.tika.parser.isatab.ISATabAssayParser.java

@Override
public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context)
        throws IOException, SAXException, TikaException {

    // Automatically detect the character encoding
    AutoDetectReader reader = new AutoDetectReader(new CloseShieldInputStream(stream), metadata,
            context.get(ServiceLoader.class, LOADER));
    CSVParser csvParser = null;

    try {//  ww  w  .  j  a va2s. c  o m
        Charset charset = reader.getCharset();
        MediaType type = new MediaType(MediaType.application("x-isatab-assay"), charset);
        metadata.set(Metadata.CONTENT_TYPE, type.toString());
        metadata.set(Metadata.CONTENT_ENCODING, charset.name());

        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
        xhtml.newline();

        csvParser = new CSVParser(reader, CSVFormat.TDF);

        xhtml.startDocument();
        xhtml.newline();

        xhtml.startElement("table");
        xhtml.newline();

        List<CSVRecord> records = csvParser.getRecords();

        for (int i = 0; i < records.get(0).size(); i++) {
            xhtml.startElement("th");
            xhtml.characters(records.get(0).get(i));
            xhtml.endElement("th");
            xhtml.newline();
        }

        for (int i = 1; i < records.size(); i++) {
            xhtml.startElement("tr");
            xhtml.newline();

            for (int j = 0; j < records.get(i).size(); j++) {
                xhtml.startElement("td");
                xhtml.characters(records.get(i).get(j));
                xhtml.endElement("td");
                xhtml.newline();
            }

            xhtml.endElement("tr");
            xhtml.newline();
        }

        xhtml.endElement("table");
        xhtml.newline();

        xhtml.endDocument();

    } finally {
        reader.close();
        csvParser.close();
    }
}

From source file:org.apache.tika.parser.isatab.ISATabStudyParser.java

@Override
public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context)
        throws IOException, SAXException, TikaException {

    // Automatically detect the character encoding
    AutoDetectReader reader = new AutoDetectReader(new CloseShieldInputStream(stream), metadata,
            context.get(ServiceLoader.class, LOADER));
    CSVParser csvParser = null;

    try {/* w  w w  .j  a  v a 2s .  c o m*/
        Charset charset = reader.getCharset();
        MediaType type = new MediaType(MediaType.application("x-isatab-study"), charset);
        metadata.set(Metadata.CONTENT_TYPE, type.toString());
        metadata.set(Metadata.CONTENT_ENCODING, charset.name());

        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
        xhtml.newline();

        csvParser = new CSVParser(reader, CSVFormat.TDF);

        xhtml.startDocument();
        xhtml.newline();

        xhtml.startElement("table");
        xhtml.newline();

        List<CSVRecord> records = csvParser.getRecords();

        for (int i = 0; i < records.get(0).size(); i++) {
            xhtml.startElement("th");
            xhtml.characters(records.get(0).get(i));
            xhtml.endElement("th");
            xhtml.newline();
        }

        for (int i = 1; i < records.get(0).size(); i++) {
            xhtml.startElement("tr");
            xhtml.newline();

            for (int j = 0; j < records.get(i).size(); j++) {
                xhtml.startElement("td");
                xhtml.characters(records.get(i).get(j));
                xhtml.endElement("td");
                xhtml.newline();
            }

            xhtml.endElement("tr");
            xhtml.newline();
        }

        xhtml.endElement("table");
        xhtml.newline();

        xhtml.endDocument();

    } finally {
        reader.close();
        csvParser.close();
    }
}

From source file:org.etudes.mneme.tool.UploadCsv.java

/**
 * Parse the contents into CSV records./*from w  w  w . java  2s  .c  om*/
 */
protected void parse() {
    try {
        Reader in = new StringReader(this.contents);
        CSVParser parser = new CSVParser(in, CSVFormat.RFC4180);
        this.records = parser.getRecords();
        parser.close();
    } catch (IOException e) {
    } finally {

    }
}

From source file:org.failearly.dataz.internal.template.generator.csv.CsvGeneratorImpl.java

@Override
protected void doInit() {
    super.doInit();
    try {//from w ww. jav a 2  s .  c o m
        final CSVFormat format = new CsvFormatResolver(csvProperties()).resolve().getCsvFormat();
        final CSVParser parser = new CSVParser(loadResourceAsReader(getFileAttribute()), format);
        this.records = toCsvRecord(parser.getRecords());
    } catch (IOException ex) {
        throw new DataSetException("Can't load resource", ex);
    }
}