List of usage examples for org.apache.commons.csv CSVRecord get
public String get(final String name)
From source file:org.andresoft.datasource.FileReformatTest.java
@Test public void testReformatChicagoFoodInpectionCsv() throws IOException { // Inspection ID | DBA Name | AKA Name| License # | Facility Type| Risk| Address| City| // State| Zip| Inspection Date| Inspection Type| Results| Violations| Latitude| Longitude| // Location/* www . j av a 2s.c o m*/ System.setProperty("line.separator", "\n"); Reader in = new FileReader("/Development/andresoft/hadoop_data/Food_Inspections_chicago.csv"); File file = new File("/Development/andresoft/hadoop_data/Food_Inspections_chicago_nomalized-2.csv"); FileWriter fw = new FileWriter(file); final CSVPrinter printer = CSVFormat.DEFAULT .withHeader("Inspection ID", "DBA Name", "AKA Name", "License #", "Facility Type", "Risk", "Address", "City", "State", "Zip", "Inspection Date", "Inspection Type", "Results", "Violation Number", "Violation", "Comments", "Latitude", "Longitude", "Loacation") .print(fw); final CSVParser parser = new CSVParser(in, CSVFormat.EXCEL.withHeader()); // Iterable<CSVRecord> records = CSVFormat.EXCEL.parse(in); for (CSVRecord record : parser) { String inspectionId = record.get("Inspection ID"); String dbaName = record.get("DBA Name"); String akaName = record.get("AKA Name"); String licenseNum = record.get("License #"); String facility = record.get("Facility Type"); String risk = record.get("Risk"); String address = record.get("Address"); String city = record.get("City"); String state = record.get("State"); String zip = record.get("Zip"); String inspectionDate = record.get("Inspection Date"); String inspectionType = record.get("Inspection Type"); String results = record.get("Results"); String violations = record.get("Violations"); String latitude = record.get("Latitude"); String longitude = record.get("Longitude"); String location = record.get("Location"); String violationsArray[] = violations.split("\\|"); for (String v : violationsArray) { String comments = "None"; String violation = "None"; String[] violationWihComment = v.split("Comments:"); if (violationWihComment.length == 2) { violation = violationWihComment[0]; comments = violationWihComment[1]; } else { violation = violationWihComment[0]; } if (!StringUtils.isBlank(violation)) { int violationNumberEndIndex = violation.indexOf('.'); int viloationNumber = Integer.valueOf((violation.substring(0, violationNumberEndIndex)).trim()); printer.printRecord(inspectionId, dbaName, akaName, licenseNum, facility, risk, address, city, state, zip, inspectionDate, inspectionType, results, viloationNumber, violation.substring(violationNumberEndIndex + 1), comments, latitude, longitude, location); } } } printer.close(); in.close(); }
From source file:org.apache.ambari.view.hive.resources.uploads.parsers.csv.CSVIterator.java
@Override public Row next() { CSVRecord row = iterator.next(); Object[] values = new Object[row.size()]; for (int i = 0; i < values.length; i++) { values[i] = row.get(i); }// w w w. j a v a 2 s. co m Row r = new Row(values); return r; }
From source file:org.apache.batchee.csv.CommonsCsvReaderTest.java
@Test public void read() throws Exception { final String path = "target/work/CommonsCsvReaderTest.txt"; final Properties jobParams = new Properties(); jobParams.setProperty("input", path); final JobOperator jobOperator = BatchRuntime.getJobOperator(); IOs.write(path, "v11,v12\nv21,v22\nv31,v32"); Batches.waitForEnd(jobOperator, jobOperator.start("csv-reader", jobParams)); final int size = StoreItems.ITEMS.size(); assertEquals(size, 3);/*w w w . j a va 2s . c om*/ for (int i = 1; i <= size; i++) { final CSVRecord record = StoreItems.ITEMS.get(i - 1); assertEquals("v" + i + "1", record.get(0)); assertEquals("v" + i + "2", record.get(1)); } }
From source file:org.apache.batchee.csv.mapper.DefaultMapper.java
@Override public T fromRecord(final CSVRecord record) { try {//from w w w.ja v a 2 s . com final T instance = type.newInstance(); for (final Map.Entry<Integer, Field> f : fieldByPosition.entrySet()) { final String obj = record.get(f.getKey()); final Field field = f.getValue(); setField(instance, obj, field); } for (final Map.Entry<String, Field> f : fieldByName.entrySet()) { final String obj = record.get(f.getKey()); final Field field = f.getValue(); setField(instance, obj, field); } return instance; } catch (final InstantiationException e) { throw new IllegalStateException(e); } catch (IllegalAccessException e) { throw new IllegalStateException(e); } }
From source file:org.apache.beam.sdk.extensions.sql.impl.schema.BeamTableUtils.java
public static BeamRecord csvLine2BeamSqlRow(CSVFormat csvFormat, String line, BeamRecordSqlType beamRecordSqlType) { List<Object> fieldsValue = new ArrayList<>(beamRecordSqlType.getFieldCount()); try (StringReader reader = new StringReader(line)) { CSVParser parser = csvFormat.parse(reader); CSVRecord rawRecord = parser.getRecords().get(0); if (rawRecord.size() != beamRecordSqlType.getFieldCount()) { throw new IllegalArgumentException(String.format("Expect %d fields, but actually %d", beamRecordSqlType.getFieldCount(), rawRecord.size())); } else {//from w w w . j a v a 2 s. c o m for (int idx = 0; idx < beamRecordSqlType.getFieldCount(); idx++) { String raw = rawRecord.get(idx); fieldsValue.add(autoCastField(beamRecordSqlType.getFieldTypeByIndex(idx), raw)); } } } catch (IOException e) { throw new IllegalArgumentException("decodeRecord failed!", e); } return new BeamRecord(beamRecordSqlType, fieldsValue); }
From source file:org.apache.nifi.csv.CSVRecordReader.java
@Override public Record nextRecord(final boolean coerceTypes, final boolean dropUnknownFields) throws IOException, MalformedRecordException { final RecordSchema schema = getSchema(); final List<String> rawFieldNames = getRawFieldNames(); final int numFieldNames = rawFieldNames.size(); for (final CSVRecord csvRecord : csvParser) { final Map<String, Object> values = new LinkedHashMap<>(); for (int i = 0; i < csvRecord.size(); i++) { final String rawFieldName = numFieldNames <= i ? "unknown_field_index_" + i : rawFieldNames.get(i); final String rawValue = csvRecord.get(i); final Optional<DataType> dataTypeOption = schema.getDataType(rawFieldName); if (!dataTypeOption.isPresent() && dropUnknownFields) { continue; }//w ww . java2s .c o m final Object value; if (coerceTypes && dataTypeOption.isPresent()) { value = convert(rawValue, dataTypeOption.get(), rawFieldName); } else if (dataTypeOption.isPresent()) { // The CSV Reader is going to return all fields as Strings, because CSV doesn't have any way to // dictate a field type. As a result, we will use the schema that we have to attempt to convert // the value into the desired type if it's a simple type. value = convertSimpleIfPossible(rawValue, dataTypeOption.get(), rawFieldName); } else { value = rawValue; } values.put(rawFieldName, value); } return new MapRecord(schema, values, coerceTypes, dropUnknownFields); } return null; }
From source file:org.apache.nifi.csv.CSVSchemaInference.java
private void inferSchema(final CSVRecordAndFieldNames recordAndFieldNames, final Map<String, FieldTypeInference> typeMap) { final CSVRecord csvRecord = recordAndFieldNames.getRecord(); for (final String fieldName : recordAndFieldNames.getFieldNames()) { final String value = csvRecord.get(fieldName); if (value == null) { return; }//from w ww . j a v a 2s. com final FieldTypeInference typeInference = typeMap.computeIfAbsent(fieldName, key -> new FieldTypeInference()); final String trimmed = trim(value); final DataType dataType = getDataType(trimmed); typeInference.addPossibleDataType(dataType); } }
From source file:org.apache.nifi.lookup.CSVRecordLookupService.java
private void loadCache() throws IllegalStateException, IOException { if (lock.tryLock()) { try {/*from w w w . j a v a 2s.co m*/ final ComponentLog logger = getLogger(); if (logger.isDebugEnabled()) { logger.debug("Loading lookup table from file: " + csvFile); } final FileReader reader = new FileReader(csvFile); final CSVParser records = csvFormat.withFirstRecordAsHeader().parse(reader); ConcurrentHashMap<String, Record> cache = new ConcurrentHashMap<>(); RecordSchema lookupRecordSchema = null; for (final CSVRecord record : records) { final String key = record.get(lookupKeyColumn); if (StringUtils.isBlank(key)) { throw new IllegalStateException("Empty lookup key encountered in: " + csvFile); } else if (!ignoreDuplicates && cache.containsKey(key)) { throw new IllegalStateException( "Duplicate lookup key encountered: " + key + " in " + csvFile); } else if (ignoreDuplicates && cache.containsKey(key)) { logger.warn("Duplicate lookup key encountered: {} in {}", new Object[] { key, csvFile }); } // Put each key/value pair (except the lookup) into the properties final Map<String, Object> properties = new HashMap<>(); record.toMap().forEach((k, v) -> { if (!lookupKeyColumn.equals(k)) { properties.put(k, v); } }); if (lookupRecordSchema == null) { List<RecordField> recordFields = new ArrayList<>(properties.size()); properties.forEach((k, v) -> recordFields .add(new RecordField(k, RecordFieldType.STRING.getDataType()))); lookupRecordSchema = new SimpleRecordSchema(recordFields); } cache.put(key, new MapRecord(lookupRecordSchema, properties)); } this.cache = cache; if (cache.isEmpty()) { logger.warn("Lookup table is empty after reading file: " + csvFile); } } finally { lock.unlock(); } } }
From source file:org.apache.nifi.lookup.SimpleCsvFileLookupService.java
private void loadCache() throws IllegalStateException, IOException { if (lock.tryLock()) { try {//w w w . j av a 2s. c o m final ComponentLog logger = getLogger(); if (logger.isDebugEnabled()) { logger.debug("Loading lookup table from file: " + csvFile); } final Map<String, String> properties = new HashMap<>(); final FileReader reader = new FileReader(csvFile); final Iterable<CSVRecord> records = csvFormat.withFirstRecordAsHeader().parse(reader); for (final CSVRecord record : records) { final String key = record.get(lookupKeyColumn); final String value = record.get(lookupValueColumn); if (StringUtils.isBlank(key)) { throw new IllegalStateException("Empty lookup key encountered in: " + csvFile); } else if (!ignoreDuplicates && properties.containsKey(key)) { throw new IllegalStateException( "Duplicate lookup key encountered: " + key + " in " + csvFile); } else if (ignoreDuplicates && properties.containsKey(key)) { logger.warn("Duplicate lookup key encountered: {} in {}", new Object[] { key, csvFile }); } properties.put(key, value); } this.cache = new ConcurrentHashMap<>(properties); if (cache.isEmpty()) { logger.warn("Lookup table is empty after reading file: " + csvFile); } } finally { lock.unlock(); } } }
From source file:org.apache.nifi.processors.csv.ParseCSVRecord.java
@Override public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException { final FlowFile original = session.get(); if (original == null) { return;/* w ww .j a v a2 s . com*/ } final AtomicBoolean lineFound = new AtomicBoolean(false); final Map<String, String> outputAttrs = new HashMap<>(); session.read(original, new InputStreamCallback() { @Override public void process(InputStream inputStream) throws IOException { final String fromAttribute = context.getProperty(PROP_RECORD_FROM_ATTRIBUTE).getValue(); String unparsedRecord; // data source is the attribute if (StringUtils.isNotBlank(fromAttribute)) { unparsedRecord = original.getAttribute(fromAttribute); if (StringUtils.isBlank(unparsedRecord)) { // will be routed to failure at the end of the method implementation return; } } else { // data source is the content // TODO expose the charset property? LineIterator iterator = IOUtils.lineIterator(inputStream, UTF_8); if (!iterator.hasNext()) { return; } unparsedRecord = iterator.next(); } lineFound.set(true); final String format = context.getProperty(PROP_FORMAT).getValue(); final String delimiter = context.getProperty(PROP_DELIMITER).evaluateAttributeExpressions(original) .getValue(); final String schemaPrefix = context.getProperty(PROP_SCHEMA_ATTR_PREFIX) .evaluateAttributeExpressions(original).getValue(); final String valuePrefix = context.getProperty(PROP_VALUE_ATTR_PREFIX) .evaluateAttributeExpressions(original).getValue(); final boolean trimValues = context.getProperty(PROP_TRIM_VALUES).asBoolean(); final CSVFormat csvFormat = buildFormat(format, delimiter, false, // this is a payload, not header anymore null); // no custom header final CSVParser parser = csvFormat.parse(new StringReader(unparsedRecord)); List<CSVRecord> records = parser.getRecords(); if (records.size() > 1) { // TODO revisit for NiFi's native micro-batching throw new ProcessException("Multi-line entries not supported"); } CSVRecord record = records.get(0); Map<String, String> originalAttrs = original.getAttributes(); // filter delimited schema attributes only Map<String, String> schemaAttrs = new HashMap<>(); for (String key : originalAttrs.keySet()) { if (key.startsWith(schemaPrefix)) { schemaAttrs.put(key, originalAttrs.get(key)); } } // put key/value pairs into attributes for (int i = 0; i < record.size(); i++) { String columnName = schemaAttrs.get(schemaPrefix + (i + 1)); // 1-based column numbering if (columnName == null) { // 1-based column index columnName = String.valueOf(i + 1); } // TODO indexed schemaless parsing vs auto-schema vs user-provided schema String columnValue = record.get(i); if (trimValues) { columnValue = columnValue.trim(); } String attrName = (StringUtils.isBlank(valuePrefix) ? "delimited.column." : valuePrefix) + columnName; outputAttrs.put(attrName, columnValue); } } }); if (lineFound.get()) { FlowFile ff = session.putAllAttributes(original, outputAttrs); session.transfer(ff, REL_SUCCESS); } else { session.transfer(original, REL_FAILURE); } }