List of usage examples for org.apache.commons.csv CSVParser getRecords
public List<CSVRecord> getRecords() throws IOException
From source file:org.apache.camel.dataformat.csv.CsvRecordConvertersTest.java
@Before public void setUp() throws Exception { CSVFormat format = CSVFormat.DEFAULT.withHeader("A", "B", "C"); CSVParser parser = new CSVParser(new StringReader("1,2,3"), format); List<CSVRecord> records = parser.getRecords(); record = records.get(0);/*from ww w . ja va 2s . c o m*/ }
From source file:org.apache.nifi.processors.csv.CsvToJson.java
@Override public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException { FlowFile flowFile = session.get();// w w w .j a v a 2 s. c om if (flowFile == null) { return; } final ObjectHolder<String> contentHolder = new ObjectHolder<>(null); session.read(flowFile, new InputStreamCallback() { @Override public void process(InputStream inputStream) throws IOException { contentHolder.set(IOUtils.toString(inputStream)); } }); CSVFormat csvFileFormat = CSVFormat.DEFAULT.withHeader(headers); try { CSVParser csvParser = new CSVParser(new StringReader(contentHolder.get()), csvFileFormat); List<CSVRecord> records = csvParser.getRecords(); if (records.size() == 0) { getLogger().error("No records found"); session.transfer(flowFile, FAILURE); } else if (records.size() > 1) { getLogger().error("More than one record found"); session.transfer(flowFile, FAILURE); } else { final CSVRecord record = records.get(0); flowFile = session.write(flowFile, new OutputStreamCallback() { @Override public void process(OutputStream outputStream) throws IOException { try (JsonGenerator generator = jsonFactory.createJsonGenerator(outputStream)) { generator.writeStartObject(); Map<String, String> recordMap = record.toMap(); for (Map.Entry<String, String> entry : recordMap.entrySet()) { generator.writeStringField(entry.getKey(), entry.getValue()); } generator.writeEndObject(); } } }); session.transfer(flowFile, SUCCESS); } } catch (IOException e) { getLogger().error(e.getMessage(), e); session.transfer(flowFile, FAILURE); } }
From source file:org.apache.nifi.processors.csv.ParseCSVRecord.java
@Override public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException { final FlowFile original = session.get(); if (original == null) { return;/*w w w . ja v a2 s . c om*/ } final AtomicBoolean lineFound = new AtomicBoolean(false); final Map<String, String> outputAttrs = new HashMap<>(); session.read(original, new InputStreamCallback() { @Override public void process(InputStream inputStream) throws IOException { final String fromAttribute = context.getProperty(PROP_RECORD_FROM_ATTRIBUTE).getValue(); String unparsedRecord; // data source is the attribute if (StringUtils.isNotBlank(fromAttribute)) { unparsedRecord = original.getAttribute(fromAttribute); if (StringUtils.isBlank(unparsedRecord)) { // will be routed to failure at the end of the method implementation return; } } else { // data source is the content // TODO expose the charset property? LineIterator iterator = IOUtils.lineIterator(inputStream, UTF_8); if (!iterator.hasNext()) { return; } unparsedRecord = iterator.next(); } lineFound.set(true); final String format = context.getProperty(PROP_FORMAT).getValue(); final String delimiter = context.getProperty(PROP_DELIMITER).evaluateAttributeExpressions(original) .getValue(); final String schemaPrefix = context.getProperty(PROP_SCHEMA_ATTR_PREFIX) .evaluateAttributeExpressions(original).getValue(); final String valuePrefix = context.getProperty(PROP_VALUE_ATTR_PREFIX) .evaluateAttributeExpressions(original).getValue(); final boolean trimValues = context.getProperty(PROP_TRIM_VALUES).asBoolean(); final CSVFormat csvFormat = buildFormat(format, delimiter, false, // this is a payload, not header anymore null); // no custom header final CSVParser parser = csvFormat.parse(new StringReader(unparsedRecord)); List<CSVRecord> records = parser.getRecords(); if (records.size() > 1) { // TODO revisit for NiFi's native micro-batching throw new ProcessException("Multi-line entries not supported"); } CSVRecord record = records.get(0); Map<String, String> originalAttrs = original.getAttributes(); // filter delimited schema attributes only Map<String, String> schemaAttrs = new HashMap<>(); for (String key : originalAttrs.keySet()) { if (key.startsWith(schemaPrefix)) { schemaAttrs.put(key, originalAttrs.get(key)); } } // put key/value pairs into attributes for (int i = 0; i < record.size(); i++) { String columnName = schemaAttrs.get(schemaPrefix + (i + 1)); // 1-based column numbering if (columnName == null) { // 1-based column index columnName = String.valueOf(i + 1); } // TODO indexed schemaless parsing vs auto-schema vs user-provided schema String columnValue = record.get(i); if (trimValues) { columnValue = columnValue.trim(); } String attrName = (StringUtils.isBlank(valuePrefix) ? "delimited.column." : valuePrefix) + columnName; outputAttrs.put(attrName, columnValue); } } }); if (lineFound.get()) { FlowFile ff = session.putAllAttributes(original, outputAttrs); session.transfer(ff, REL_SUCCESS); } else { session.transfer(original, REL_FAILURE); } }
From source file:org.apache.phoenix.pherf.result.impl.CSVFileResultHandler.java
public synchronized List<Result> read() throws IOException { CSVParser parser = null; util.ensureBaseResultDirExists();/*from w ww.ja v a 2s . co m*/ try { File file = new File(resultFileName); parser = CSVParser.parse(file, Charset.defaultCharset(), CSVFormat.DEFAULT); List<CSVRecord> records = parser.getRecords(); List<Result> results = new ArrayList<>(); String header = null; for (CSVRecord record : records) { // First record is the CSV Header if (record.getRecordNumber() == 1) { header = record.toString(); continue; } List<ResultValue> resultValues = new ArrayList<>(); for (String val : record.toString().split(PherfConstants.RESULT_FILE_DELIMETER)) { resultValues.add(new ResultValue(val)); } Result result = new Result(resultFileDetails, header, resultValues); results.add(result); } return results; } finally { parser.close(); } }
From source file:org.apache.ranger.unixusersync.process.FileSourceUserGroupBuilder.java
public Map<String, List<String>> readTextFile(File textFile) throws Exception { Map<String, List<String>> ret = new HashMap<String, List<String>>(); String delimiter = config.getUserSyncFileSourceDelimiter(); CSVFormat csvFormat = CSVFormat.newFormat(delimiter.charAt(0)); CSVParser csvParser = new CSVParser(new BufferedReader(new FileReader(textFile)), csvFormat); List<CSVRecord> csvRecordList = csvParser.getRecords(); if (csvRecordList != null) { for (CSVRecord csvRecord : csvRecordList) { List<String> groups = new ArrayList<String>(); String user = csvRecord.get(0); user = user.replaceAll("^\"|\"$", ""); int i = csvRecord.size(); for (int j = 1; j < i; j++) { String group = csvRecord.get(j); if (group != null && !group.isEmpty()) { group = group.replaceAll("^\"|\"$", ""); groups.add(group);/*from ww w . ja v a 2s . c om*/ } } ret.put(user, groups); } } csvParser.close(); return ret; }
From source file:org.apache.storm.sql.runtime.serde.csv.CsvScheme.java
@Override public List<Object> deserialize(ByteBuffer ser) { try {/*from ww w. j a va 2s . com*/ String data = new String(Utils.toByteArray(ser), StandardCharsets.UTF_8); CSVParser parser = CSVParser.parse(data, CSVFormat.RFC4180); CSVRecord record = parser.getRecords().get(0); Preconditions.checkArgument(record.size() == fieldNames.size(), "Invalid schema"); ArrayList<Object> list = new ArrayList<>(fieldNames.size()); for (int i = 0; i < record.size(); i++) { list.add(record.get(i)); } return list; } catch (IOException e) { throw new RuntimeException(e); } }
From source file:org.apache.tika.parser.isatab.ISATabAssayParser.java
@Override public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { // Automatically detect the character encoding AutoDetectReader reader = new AutoDetectReader(new CloseShieldInputStream(stream), metadata, context.get(ServiceLoader.class, LOADER)); CSVParser csvParser = null; try {// ww w . j a va2s. c o m Charset charset = reader.getCharset(); MediaType type = new MediaType(MediaType.application("x-isatab-assay"), charset); metadata.set(Metadata.CONTENT_TYPE, type.toString()); metadata.set(Metadata.CONTENT_ENCODING, charset.name()); XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata); xhtml.newline(); csvParser = new CSVParser(reader, CSVFormat.TDF); xhtml.startDocument(); xhtml.newline(); xhtml.startElement("table"); xhtml.newline(); List<CSVRecord> records = csvParser.getRecords(); for (int i = 0; i < records.get(0).size(); i++) { xhtml.startElement("th"); xhtml.characters(records.get(0).get(i)); xhtml.endElement("th"); xhtml.newline(); } for (int i = 1; i < records.size(); i++) { xhtml.startElement("tr"); xhtml.newline(); for (int j = 0; j < records.get(i).size(); j++) { xhtml.startElement("td"); xhtml.characters(records.get(i).get(j)); xhtml.endElement("td"); xhtml.newline(); } xhtml.endElement("tr"); xhtml.newline(); } xhtml.endElement("table"); xhtml.newline(); xhtml.endDocument(); } finally { reader.close(); csvParser.close(); } }
From source file:org.apache.tika.parser.isatab.ISATabStudyParser.java
@Override public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { // Automatically detect the character encoding AutoDetectReader reader = new AutoDetectReader(new CloseShieldInputStream(stream), metadata, context.get(ServiceLoader.class, LOADER)); CSVParser csvParser = null; try {/* w w w .j a v a 2s . c o m*/ Charset charset = reader.getCharset(); MediaType type = new MediaType(MediaType.application("x-isatab-study"), charset); metadata.set(Metadata.CONTENT_TYPE, type.toString()); metadata.set(Metadata.CONTENT_ENCODING, charset.name()); XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata); xhtml.newline(); csvParser = new CSVParser(reader, CSVFormat.TDF); xhtml.startDocument(); xhtml.newline(); xhtml.startElement("table"); xhtml.newline(); List<CSVRecord> records = csvParser.getRecords(); for (int i = 0; i < records.get(0).size(); i++) { xhtml.startElement("th"); xhtml.characters(records.get(0).get(i)); xhtml.endElement("th"); xhtml.newline(); } for (int i = 1; i < records.get(0).size(); i++) { xhtml.startElement("tr"); xhtml.newline(); for (int j = 0; j < records.get(i).size(); j++) { xhtml.startElement("td"); xhtml.characters(records.get(i).get(j)); xhtml.endElement("td"); xhtml.newline(); } xhtml.endElement("tr"); xhtml.newline(); } xhtml.endElement("table"); xhtml.newline(); xhtml.endDocument(); } finally { reader.close(); csvParser.close(); } }
From source file:org.etudes.mneme.tool.UploadCsv.java
/** * Parse the contents into CSV records./*from w w w . java 2s .c om*/ */ protected void parse() { try { Reader in = new StringReader(this.contents); CSVParser parser = new CSVParser(in, CSVFormat.RFC4180); this.records = parser.getRecords(); parser.close(); } catch (IOException e) { } finally { } }
From source file:org.failearly.dataz.internal.template.generator.csv.CsvGeneratorImpl.java
@Override protected void doInit() { super.doInit(); try {//from w ww. jav a 2 s . c o m final CSVFormat format = new CsvFormatResolver(csvProperties()).resolve().getCsvFormat(); final CSVParser parser = new CSVParser(loadResourceAsReader(getFileAttribute()), format); this.records = toCsvRecord(parser.getRecords()); } catch (IOException ex) { throw new DataSetException("Can't load resource", ex); } }