Example usage for org.apache.commons.csv CSVRecord get

List of usage examples for org.apache.commons.csv CSVRecord get

Introduction

In this page you can find the example usage for org.apache.commons.csv CSVRecord get.

Prototype

public String get(final String name) 

Source Link

Document

Returns a value by name.

Usage

From source file:org.andresoft.datasource.FileReformatTest.java

@Test
public void testReformatChicagoFoodInpectionCsv() throws IOException {
    // Inspection ID | DBA Name | AKA Name| License # | Facility Type| Risk| Address| City|
    // State| Zip| Inspection Date| Inspection Type| Results| Violations| Latitude| Longitude|
    // Location/*  www  . j  av a  2s.c  o  m*/
    System.setProperty("line.separator", "\n");
    Reader in = new FileReader("/Development/andresoft/hadoop_data/Food_Inspections_chicago.csv");
    File file = new File("/Development/andresoft/hadoop_data/Food_Inspections_chicago_nomalized-2.csv");
    FileWriter fw = new FileWriter(file);

    final CSVPrinter printer = CSVFormat.DEFAULT
            .withHeader("Inspection ID", "DBA Name", "AKA Name", "License #", "Facility Type", "Risk",
                    "Address", "City", "State", "Zip", "Inspection Date", "Inspection Type", "Results",
                    "Violation Number", "Violation", "Comments", "Latitude", "Longitude", "Loacation")
            .print(fw);

    final CSVParser parser = new CSVParser(in, CSVFormat.EXCEL.withHeader());

    // Iterable<CSVRecord> records = CSVFormat.EXCEL.parse(in);
    for (CSVRecord record : parser) {
        String inspectionId = record.get("Inspection ID");
        String dbaName = record.get("DBA Name");
        String akaName = record.get("AKA Name");
        String licenseNum = record.get("License #");
        String facility = record.get("Facility Type");
        String risk = record.get("Risk");
        String address = record.get("Address");
        String city = record.get("City");
        String state = record.get("State");
        String zip = record.get("Zip");
        String inspectionDate = record.get("Inspection Date");
        String inspectionType = record.get("Inspection Type");
        String results = record.get("Results");
        String violations = record.get("Violations");
        String latitude = record.get("Latitude");
        String longitude = record.get("Longitude");
        String location = record.get("Location");

        String violationsArray[] = violations.split("\\|");
        for (String v : violationsArray) {
            String comments = "None";
            String violation = "None";
            String[] violationWihComment = v.split("Comments:");
            if (violationWihComment.length == 2) {
                violation = violationWihComment[0];
                comments = violationWihComment[1];
            } else {
                violation = violationWihComment[0];
            }
            if (!StringUtils.isBlank(violation)) {
                int violationNumberEndIndex = violation.indexOf('.');
                int viloationNumber = Integer.valueOf((violation.substring(0, violationNumberEndIndex)).trim());

                printer.printRecord(inspectionId, dbaName, akaName, licenseNum, facility, risk, address, city,
                        state, zip, inspectionDate, inspectionType, results, viloationNumber,
                        violation.substring(violationNumberEndIndex + 1), comments, latitude, longitude,
                        location);
            }
        }

    }
    printer.close();
    in.close();
}

From source file:org.apache.ambari.view.hive.resources.uploads.parsers.csv.CSVIterator.java

@Override
public Row next() {
    CSVRecord row = iterator.next();
    Object[] values = new Object[row.size()];
    for (int i = 0; i < values.length; i++) {
        values[i] = row.get(i);
    }//  w w  w. j  a v a  2 s.  co m
    Row r = new Row(values);
    return r;
}

From source file:org.apache.batchee.csv.CommonsCsvReaderTest.java

@Test
public void read() throws Exception {
    final String path = "target/work/CommonsCsvReaderTest.txt";

    final Properties jobParams = new Properties();
    jobParams.setProperty("input", path);

    final JobOperator jobOperator = BatchRuntime.getJobOperator();
    IOs.write(path, "v11,v12\nv21,v22\nv31,v32");
    Batches.waitForEnd(jobOperator, jobOperator.start("csv-reader", jobParams));

    final int size = StoreItems.ITEMS.size();
    assertEquals(size, 3);/*w  w w  . j a  va 2s . c  om*/
    for (int i = 1; i <= size; i++) {
        final CSVRecord record = StoreItems.ITEMS.get(i - 1);
        assertEquals("v" + i + "1", record.get(0));
        assertEquals("v" + i + "2", record.get(1));
    }
}

From source file:org.apache.batchee.csv.mapper.DefaultMapper.java

@Override
public T fromRecord(final CSVRecord record) {
    try {//from   w w w.ja  v a  2 s .  com
        final T instance = type.newInstance();
        for (final Map.Entry<Integer, Field> f : fieldByPosition.entrySet()) {
            final String obj = record.get(f.getKey());
            final Field field = f.getValue();
            setField(instance, obj, field);
        }
        for (final Map.Entry<String, Field> f : fieldByName.entrySet()) {
            final String obj = record.get(f.getKey());
            final Field field = f.getValue();
            setField(instance, obj, field);
        }
        return instance;
    } catch (final InstantiationException e) {
        throw new IllegalStateException(e);
    } catch (IllegalAccessException e) {
        throw new IllegalStateException(e);
    }
}

From source file:org.apache.beam.sdk.extensions.sql.impl.schema.BeamTableUtils.java

public static BeamRecord csvLine2BeamSqlRow(CSVFormat csvFormat, String line,
        BeamRecordSqlType beamRecordSqlType) {
    List<Object> fieldsValue = new ArrayList<>(beamRecordSqlType.getFieldCount());
    try (StringReader reader = new StringReader(line)) {
        CSVParser parser = csvFormat.parse(reader);
        CSVRecord rawRecord = parser.getRecords().get(0);

        if (rawRecord.size() != beamRecordSqlType.getFieldCount()) {
            throw new IllegalArgumentException(String.format("Expect %d fields, but actually %d",
                    beamRecordSqlType.getFieldCount(), rawRecord.size()));
        } else {//from   w w w .  j  a v a  2  s.  c  o  m
            for (int idx = 0; idx < beamRecordSqlType.getFieldCount(); idx++) {
                String raw = rawRecord.get(idx);
                fieldsValue.add(autoCastField(beamRecordSqlType.getFieldTypeByIndex(idx), raw));
            }
        }
    } catch (IOException e) {
        throw new IllegalArgumentException("decodeRecord failed!", e);
    }
    return new BeamRecord(beamRecordSqlType, fieldsValue);
}

From source file:org.apache.nifi.csv.CSVRecordReader.java

@Override
public Record nextRecord(final boolean coerceTypes, final boolean dropUnknownFields)
        throws IOException, MalformedRecordException {
    final RecordSchema schema = getSchema();

    final List<String> rawFieldNames = getRawFieldNames();
    final int numFieldNames = rawFieldNames.size();

    for (final CSVRecord csvRecord : csvParser) {
        final Map<String, Object> values = new LinkedHashMap<>();
        for (int i = 0; i < csvRecord.size(); i++) {
            final String rawFieldName = numFieldNames <= i ? "unknown_field_index_" + i : rawFieldNames.get(i);
            final String rawValue = csvRecord.get(i);

            final Optional<DataType> dataTypeOption = schema.getDataType(rawFieldName);

            if (!dataTypeOption.isPresent() && dropUnknownFields) {
                continue;
            }//w  ww . java2s  .c  o m

            final Object value;
            if (coerceTypes && dataTypeOption.isPresent()) {
                value = convert(rawValue, dataTypeOption.get(), rawFieldName);
            } else if (dataTypeOption.isPresent()) {
                // The CSV Reader is going to return all fields as Strings, because CSV doesn't have any way to
                // dictate a field type. As a result, we will use the schema that we have to attempt to convert
                // the value into the desired type if it's a simple type.
                value = convertSimpleIfPossible(rawValue, dataTypeOption.get(), rawFieldName);
            } else {
                value = rawValue;
            }

            values.put(rawFieldName, value);
        }

        return new MapRecord(schema, values, coerceTypes, dropUnknownFields);
    }

    return null;
}

From source file:org.apache.nifi.csv.CSVSchemaInference.java

private void inferSchema(final CSVRecordAndFieldNames recordAndFieldNames,
        final Map<String, FieldTypeInference> typeMap) {
    final CSVRecord csvRecord = recordAndFieldNames.getRecord();
    for (final String fieldName : recordAndFieldNames.getFieldNames()) {
        final String value = csvRecord.get(fieldName);
        if (value == null) {
            return;
        }//from w  ww .  j  a v  a  2s.  com

        final FieldTypeInference typeInference = typeMap.computeIfAbsent(fieldName,
                key -> new FieldTypeInference());
        final String trimmed = trim(value);
        final DataType dataType = getDataType(trimmed);
        typeInference.addPossibleDataType(dataType);
    }
}

From source file:org.apache.nifi.lookup.CSVRecordLookupService.java

private void loadCache() throws IllegalStateException, IOException {
    if (lock.tryLock()) {
        try {/*from w w w .  j  a v  a 2s.co  m*/
            final ComponentLog logger = getLogger();
            if (logger.isDebugEnabled()) {
                logger.debug("Loading lookup table from file: " + csvFile);
            }

            final FileReader reader = new FileReader(csvFile);
            final CSVParser records = csvFormat.withFirstRecordAsHeader().parse(reader);
            ConcurrentHashMap<String, Record> cache = new ConcurrentHashMap<>();
            RecordSchema lookupRecordSchema = null;
            for (final CSVRecord record : records) {
                final String key = record.get(lookupKeyColumn);

                if (StringUtils.isBlank(key)) {
                    throw new IllegalStateException("Empty lookup key encountered in: " + csvFile);
                } else if (!ignoreDuplicates && cache.containsKey(key)) {
                    throw new IllegalStateException(
                            "Duplicate lookup key encountered: " + key + " in " + csvFile);
                } else if (ignoreDuplicates && cache.containsKey(key)) {
                    logger.warn("Duplicate lookup key encountered: {} in {}", new Object[] { key, csvFile });
                }

                // Put each key/value pair (except the lookup) into the properties
                final Map<String, Object> properties = new HashMap<>();
                record.toMap().forEach((k, v) -> {
                    if (!lookupKeyColumn.equals(k)) {
                        properties.put(k, v);
                    }
                });

                if (lookupRecordSchema == null) {
                    List<RecordField> recordFields = new ArrayList<>(properties.size());
                    properties.forEach((k, v) -> recordFields
                            .add(new RecordField(k, RecordFieldType.STRING.getDataType())));
                    lookupRecordSchema = new SimpleRecordSchema(recordFields);
                }

                cache.put(key, new MapRecord(lookupRecordSchema, properties));
            }

            this.cache = cache;

            if (cache.isEmpty()) {
                logger.warn("Lookup table is empty after reading file: " + csvFile);
            }
        } finally {
            lock.unlock();
        }
    }
}

From source file:org.apache.nifi.lookup.SimpleCsvFileLookupService.java

private void loadCache() throws IllegalStateException, IOException {
    if (lock.tryLock()) {
        try {//w w w  .  j av  a 2s.  c o  m
            final ComponentLog logger = getLogger();
            if (logger.isDebugEnabled()) {
                logger.debug("Loading lookup table from file: " + csvFile);
            }

            final Map<String, String> properties = new HashMap<>();
            final FileReader reader = new FileReader(csvFile);
            final Iterable<CSVRecord> records = csvFormat.withFirstRecordAsHeader().parse(reader);
            for (final CSVRecord record : records) {
                final String key = record.get(lookupKeyColumn);
                final String value = record.get(lookupValueColumn);
                if (StringUtils.isBlank(key)) {
                    throw new IllegalStateException("Empty lookup key encountered in: " + csvFile);
                } else if (!ignoreDuplicates && properties.containsKey(key)) {
                    throw new IllegalStateException(
                            "Duplicate lookup key encountered: " + key + " in " + csvFile);
                } else if (ignoreDuplicates && properties.containsKey(key)) {
                    logger.warn("Duplicate lookup key encountered: {} in {}", new Object[] { key, csvFile });
                }
                properties.put(key, value);
            }

            this.cache = new ConcurrentHashMap<>(properties);

            if (cache.isEmpty()) {
                logger.warn("Lookup table is empty after reading file: " + csvFile);
            }
        } finally {
            lock.unlock();
        }
    }
}

From source file:org.apache.nifi.processors.csv.ParseCSVRecord.java

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    final FlowFile original = session.get();
    if (original == null) {
        return;/* w ww  .j a v  a2  s .  com*/
    }

    final AtomicBoolean lineFound = new AtomicBoolean(false);
    final Map<String, String> outputAttrs = new HashMap<>();

    session.read(original, new InputStreamCallback() {
        @Override
        public void process(InputStream inputStream) throws IOException {
            final String fromAttribute = context.getProperty(PROP_RECORD_FROM_ATTRIBUTE).getValue();

            String unparsedRecord;
            // data source is the attribute
            if (StringUtils.isNotBlank(fromAttribute)) {
                unparsedRecord = original.getAttribute(fromAttribute);
                if (StringUtils.isBlank(unparsedRecord)) {
                    // will be routed to failure at the end of the method implementation
                    return;
                }
            } else {
                // data source is the content
                // TODO expose the charset property?
                LineIterator iterator = IOUtils.lineIterator(inputStream, UTF_8);
                if (!iterator.hasNext()) {
                    return;
                }
                unparsedRecord = iterator.next();
            }

            lineFound.set(true);
            final String format = context.getProperty(PROP_FORMAT).getValue();
            final String delimiter = context.getProperty(PROP_DELIMITER).evaluateAttributeExpressions(original)
                    .getValue();
            final String schemaPrefix = context.getProperty(PROP_SCHEMA_ATTR_PREFIX)
                    .evaluateAttributeExpressions(original).getValue();
            final String valuePrefix = context.getProperty(PROP_VALUE_ATTR_PREFIX)
                    .evaluateAttributeExpressions(original).getValue();
            final boolean trimValues = context.getProperty(PROP_TRIM_VALUES).asBoolean();

            final CSVFormat csvFormat = buildFormat(format, delimiter, false, // this is a payload, not header anymore
                    null); // no custom header

            final CSVParser parser = csvFormat.parse(new StringReader(unparsedRecord));
            List<CSVRecord> records = parser.getRecords();
            if (records.size() > 1) {
                // TODO revisit for NiFi's native micro-batching
                throw new ProcessException("Multi-line entries not supported");
            }

            CSVRecord record = records.get(0);

            Map<String, String> originalAttrs = original.getAttributes();
            // filter delimited schema attributes only
            Map<String, String> schemaAttrs = new HashMap<>();
            for (String key : originalAttrs.keySet()) {
                if (key.startsWith(schemaPrefix)) {
                    schemaAttrs.put(key, originalAttrs.get(key));
                }
            }

            // put key/value pairs into attributes
            for (int i = 0; i < record.size(); i++) {
                String columnName = schemaAttrs.get(schemaPrefix + (i + 1)); // 1-based column numbering
                if (columnName == null) {
                    // 1-based column index
                    columnName = String.valueOf(i + 1);
                }
                // TODO indexed schemaless parsing vs auto-schema vs user-provided schema
                String columnValue = record.get(i);
                if (trimValues) {
                    columnValue = columnValue.trim();
                }
                String attrName = (StringUtils.isBlank(valuePrefix) ? "delimited.column." : valuePrefix)
                        + columnName;
                outputAttrs.put(attrName, columnValue);
            }
        }
    });

    if (lineFound.get()) {
        FlowFile ff = session.putAllAttributes(original, outputAttrs);
        session.transfer(ff, REL_SUCCESS);
    } else {
        session.transfer(original, REL_FAILURE);
    }
}