Example usage for org.apache.commons.csv CSVRecord get

Introduction

In this page you can find the example usage for org.apache.commons.csv CSVRecord get.

Prototype

public String get(final String name)

Source Link

Document

Returns a value by name.

Usage

From source file:org.andresoft.datasource.FileReformatTest.java

@Test
public void testReformatChicagoFoodInpectionCsv() throws IOException {
    // Inspection ID | DBA Name | AKA Name| License # | Facility Type| Risk| Address| City|
    // State| Zip| Inspection Date| Inspection Type| Results| Violations| Latitude| Longitude|
    // Location/*  www  . j  av a  2s.c  o  m*/
    System.setProperty("line.separator", "\n");
    Reader in = new FileReader("/Development/andresoft/hadoop_data/Food_Inspections_chicago.csv");
    File file = new File("/Development/andresoft/hadoop_data/Food_Inspections_chicago_nomalized-2.csv");
    FileWriter fw = new FileWriter(file);

    final CSVPrinter printer = CSVFormat.DEFAULT
            .withHeader("Inspection ID", "DBA Name", "AKA Name", "License #", "Facility Type", "Risk",
                    "Address", "City", "State", "Zip", "Inspection Date", "Inspection Type", "Results",
                    "Violation Number", "Violation", "Comments", "Latitude", "Longitude", "Loacation")
            .print(fw);

    final CSVParser parser = new CSVParser(in, CSVFormat.EXCEL.withHeader());

    // Iterable<CSVRecord> records = CSVFormat.EXCEL.parse(in);
    for (CSVRecord record : parser) {
        String inspectionId = record.get("Inspection ID");
        String dbaName = record.get("DBA Name");
        String akaName = record.get("AKA Name");
        String licenseNum = record.get("License #");
        String facility = record.get("Facility Type");
        String risk = record.get("Risk");
        String address = record.get("Address");
        String city = record.get("City");
        String state = record.get("State");
        String zip = record.get("Zip");
        String inspectionDate = record.get("Inspection Date");
        String inspectionType = record.get("Inspection Type");
        String results = record.get("Results");
        String violations = record.get("Violations");
        String latitude = record.get("Latitude");
        String longitude = record.get("Longitude");
        String location = record.get("Location");

        String violationsArray[] = violations.split("\\|");
        for (String v : violationsArray) {
            String comments = "None";
            String violation = "None";
            String[] violationWihComment = v.split("Comments:");
            if (violationWihComment.length == 2) {
                violation = violationWihComment[0];
                comments = violationWihComment[1];
            } else {
                violation = violationWihComment[0];
            }
            if (!StringUtils.isBlank(violation)) {
                int violationNumberEndIndex = violation.indexOf('.');
                int viloationNumber = Integer.valueOf((violation.substring(0, violationNumberEndIndex)).trim());

                printer.printRecord(inspectionId, dbaName, akaName, licenseNum, facility, risk, address, city,
                        state, zip, inspectionDate, inspectionType, results, viloationNumber,
                        violation.substring(violationNumberEndIndex + 1), comments, latitude, longitude,
                        location);
            }
        }

    }
    printer.close();
    in.close();
}

From source file:org.apache.ambari.view.hive.resources.uploads.parsers.csv.CSVIterator.java

@Override
public Row next() {
    CSVRecord row = iterator.next();
    Object[] values = new Object[row.size()];
    for (int i = 0; i < values.length; i++) {
        values[i] = row.get(i);
    }//  w w  w. j  a v a  2 s.  co m
    Row r = new Row(values);
    return r;
}

From source file:org.apache.batchee.csv.CommonsCsvReaderTest.java

@Test
public void read() throws Exception {
    final String path = "target/work/CommonsCsvReaderTest.txt";

    final Properties jobParams = new Properties();
    jobParams.setProperty("input", path);

    final JobOperator jobOperator = BatchRuntime.getJobOperator();
    IOs.write(path, "v11,v12\nv21,v22\nv31,v32");
    Batches.waitForEnd(jobOperator, jobOperator.start("csv-reader", jobParams));

    final int size = StoreItems.ITEMS.size();
    assertEquals(size, 3);/*w  w w  . j a  va 2s . c  om*/
    for (int i = 1; i <= size; i++) {
        final CSVRecord record = StoreItems.ITEMS.get(i - 1);
        assertEquals("v" + i + "1", record.get(0));
        assertEquals("v" + i + "2", record.get(1));
    }
}

From source file:org.apache.batchee.csv.mapper.DefaultMapper.java

@Override
public T fromRecord(final CSVRecord record) {
    try {//from   w w w.ja  v a  2 s .  com
        final T instance = type.newInstance();
        for (final Map.Entry<Integer, Field> f : fieldByPosition.entrySet()) {
            final String obj = record.get(f.getKey());
            final Field field = f.getValue();
            setField(instance, obj, field);
        }
        for (final Map.Entry<String, Field> f : fieldByName.entrySet()) {
            final String obj = record.get(f.getKey());
            final Field field = f.getValue();
            setField(instance, obj, field);
        }
        return instance;
    } catch (final InstantiationException e) {
        throw new IllegalStateException(e);
    } catch (IllegalAccessException e) {
        throw new IllegalStateException(e);
    }
}

From source file:org.apache.beam.sdk.extensions.sql.impl.schema.BeamTableUtils.java

public static BeamRecord csvLine2BeamSqlRow(CSVFormat csvFormat, String line,
        BeamRecordSqlType beamRecordSqlType) {
    List<Object> fieldsValue = new ArrayList<>(beamRecordSqlType.getFieldCount());
    try (StringReader reader = new StringReader(line)) {
        CSVParser parser = csvFormat.parse(reader);
        CSVRecord rawRecord = parser.getRecords().get(0);

        if (rawRecord.size() != beamRecordSqlType.getFieldCount()) {
            throw new IllegalArgumentException(String.format("Expect %d fields, but actually %d",
                    beamRecordSqlType.getFieldCount(), rawRecord.size()));
        } else {//from   w w w .  j  a v a  2  s.  c  o  m
            for (int idx = 0; idx < beamRecordSqlType.getFieldCount(); idx++) {
                String raw = rawRecord.get(idx);
                fieldsValue.add(autoCastField(beamRecordSqlType.getFieldTypeByIndex(idx), raw));
            }
        }
    } catch (IOException e) {
        throw new IllegalArgumentException("decodeRecord failed!", e);
    }
    return new BeamRecord(beamRecordSqlType, fieldsValue);
}

From source file:org.apache.nifi.csv.CSVRecordReader.java

@Override
public Record nextRecord(final boolean coerceTypes, final boolean dropUnknownFields)
        throws IOException, MalformedRecordException {
    final RecordSchema schema = getSchema();

    final List<String> rawFieldNames = getRawFieldNames();
    final int numFieldNames = rawFieldNames.size();

    for (final CSVRecord csvRecord : csvParser) {
        final Map<String, Object> values = new LinkedHashMap<>();
        for (int i = 0; i < csvRecord.size(); i++) {
            final String rawFieldName = numFieldNames <= i ? "unknown_field_index_" + i : rawFieldNames.get(i);
            final String rawValue = csvRecord.get(i);

            final Optional<DataType> dataTypeOption = schema.getDataType(rawFieldName);

            if (!dataTypeOption.isPresent() && dropUnknownFields) {
                continue;
            }//w  ww . java2s  .c  o m

            final Object value;
            if (coerceTypes && dataTypeOption.isPresent()) {
                value = convert(rawValue, dataTypeOption.get(), rawFieldName);
            } else if (dataTypeOption.isPresent()) {
                // The CSV Reader is going to return all fields as Strings, because CSV doesn't have any way to
                // dictate a field type. As a result, we will use the schema that we have to attempt to convert
                // the value into the desired type if it's a simple type.
                value = convertSimpleIfPossible(rawValue, dataTypeOption.get(), rawFieldName);
            } else {
                value = rawValue;
            }

            values.put(rawFieldName, value);
        }

        return new MapRecord(schema, values, coerceTypes, dropUnknownFields);
    }

    return null;
}

From source file:org.apache.nifi.csv.CSVSchemaInference.java

private void inferSchema(final CSVRecordAndFieldNames recordAndFieldNames,
        final Map<String, FieldTypeInference> typeMap) {
    final CSVRecord csvRecord = recordAndFieldNames.getRecord();
    for (final String fieldName : recordAndFieldNames.getFieldNames()) {
        final String value = csvRecord.get(fieldName);
        if (value == null) {
            return;
        }//from w  ww .  j  a v  a  2s.  com

        final FieldTypeInference typeInference = typeMap.computeIfAbsent(fieldName,
                key -> new FieldTypeInference());
        final String trimmed = trim(value);
        final DataType dataType = getDataType(trimmed);
        typeInference.addPossibleDataType(dataType);
    }
}

From source file:org.apache.nifi.lookup.CSVRecordLookupService.java

private void loadCache() throws IllegalStateException, IOException {
    if (lock.tryLock()) {
        try {/*from w w w .  j  a v  a 2s.co  m*/
            final ComponentLog logger = getLogger();
            if (logger.isDebugEnabled()) {
                logger.debug("Loading lookup table from file: " + csvFile);
            }

            final FileReader reader = new FileReader(csvFile);
            final CSVParser records = csvFormat.withFirstRecordAsHeader().parse(reader);
            ConcurrentHashMap<String, Record> cache = new ConcurrentHashMap<>();
            RecordSchema lookupRecordSchema = null;
            for (final CSVRecord record : records) {
                final String key = record.get(lookupKeyColumn);

                if (StringUtils.isBlank(key)) {
                    throw new IllegalStateException("Empty lookup key encountered in: " + csvFile);
                } else if (!ignoreDuplicates && cache.containsKey(key)) {
                    throw new IllegalStateException(
                            "Duplicate lookup key encountered: " + key + " in " + csvFile);
                } else if (ignoreDuplicates && cache.containsKey(key)) {
                    logger.warn("Duplicate lookup key encountered: {} in {}", new Object[] { key, csvFile });
                }

                // Put each key/value pair (except the lookup) into the properties
                final Map<String, Object> properties = new HashMap<>();
                record.toMap().forEach((k, v) -> {
                    if (!lookupKeyColumn.equals(k)) {
                        properties.put(k, v);
                    }
                });

                if (lookupRecordSchema == null) {
                    List<RecordField> recordFields = new ArrayList<>(properties.size());
                    properties.forEach((k, v) -> recordFields
                            .add(new RecordField(k, RecordFieldType.STRING.getDataType())));
                    lookupRecordSchema = new SimpleRecordSchema(recordFields);
                }

                cache.put(key, new MapRecord(lookupRecordSchema, properties));
            }

            this.cache = cache;

            if (cache.isEmpty()) {
                logger.warn("Lookup table is empty after reading file: " + csvFile);
            }
        } finally {
            lock.unlock();
        }
    }
}

From source file:org.apache.nifi.lookup.SimpleCsvFileLookupService.java

private void loadCache() throws IllegalStateException, IOException {
    if (lock.tryLock()) {
        try {//w w w  .  j av  a 2s.  c o  m
            final ComponentLog logger = getLogger();
            if (logger.isDebugEnabled()) {
                logger.debug("Loading lookup table from file: " + csvFile);
            }

            final Map<String, String> properties = new HashMap<>();
            final FileReader reader = new FileReader(csvFile);
            final Iterable<CSVRecord> records = csvFormat.withFirstRecordAsHeader().parse(reader);
            for (final CSVRecord record : records) {
                final String key = record.get(lookupKeyColumn);
                final String value = record.get(lookupValueColumn);
                if (StringUtils.isBlank(key)) {
                    throw new IllegalStateException("Empty lookup key encountered in: " + csvFile);
                } else if (!ignoreDuplicates && properties.containsKey(key)) {
                    throw new IllegalStateException(
                            "Duplicate lookup key encountered: " + key + " in " + csvFile);
                } else if (ignoreDuplicates && properties.containsKey(key)) {
                    logger.warn("Duplicate lookup key encountered: {} in {}", new Object[] { key, csvFile });
                }
                properties.put(key, value);
            }

            this.cache = new ConcurrentHashMap<>(properties);

            if (cache.isEmpty()) {
                logger.warn("Lookup table is empty after reading file: " + csvFile);
            }
        } finally {
            lock.unlock();
        }
    }
}

From source file:org.apache.nifi.processors.csv.ParseCSVRecord.java

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    final FlowFile original = session.get();
    if (original == null) {
        return;/* w ww  .j a v  a2  s .  com*/
    }

    final AtomicBoolean lineFound = new AtomicBoolean(false);
    final Map<String, String> outputAttrs = new HashMap<>();

    session.read(original, new InputStreamCallback() {
        @Override
        public void process(InputStream inputStream) throws IOException {
            final String fromAttribute = context.getProperty(PROP_RECORD_FROM_ATTRIBUTE).getValue();

            String unparsedRecord;
            // data source is the attribute
            if (StringUtils.isNotBlank(fromAttribute)) {
                unparsedRecord = original.getAttribute(fromAttribute);
                if (StringUtils.isBlank(unparsedRecord)) {
                    // will be routed to failure at the end of the method implementation
                    return;
                }
            } else {
                // data source is the content
                // TODO expose the charset property?
                LineIterator iterator = IOUtils.lineIterator(inputStream, UTF_8);
                if (!iterator.hasNext()) {
                    return;
                }
                unparsedRecord = iterator.next();
            }

            lineFound.set(true);
            final String format = context.getProperty(PROP_FORMAT).getValue();
            final String delimiter = context.getProperty(PROP_DELIMITER).evaluateAttributeExpressions(original)
                    .getValue();
            final String schemaPrefix = context.getProperty(PROP_SCHEMA_ATTR_PREFIX)
                    .evaluateAttributeExpressions(original).getValue();
            final String valuePrefix = context.getProperty(PROP_VALUE_ATTR_PREFIX)
                    .evaluateAttributeExpressions(original).getValue();
            final boolean trimValues = context.getProperty(PROP_TRIM_VALUES).asBoolean();

            final CSVFormat csvFormat = buildFormat(format, delimiter, false, // this is a payload, not header anymore
                    null); // no custom header

            final CSVParser parser = csvFormat.parse(new StringReader(unparsedRecord));
            List<CSVRecord> records = parser.getRecords();
            if (records.size() > 1) {
                // TODO revisit for NiFi's native micro-batching
                throw new ProcessException("Multi-line entries not supported");
            }

            CSVRecord record = records.get(0);

            Map<String, String> originalAttrs = original.getAttributes();
            // filter delimited schema attributes only
            Map<String, String> schemaAttrs = new HashMap<>();
            for (String key : originalAttrs.keySet()) {
                if (key.startsWith(schemaPrefix)) {
                    schemaAttrs.put(key, originalAttrs.get(key));
                }
            }

            // put key/value pairs into attributes
            for (int i = 0; i < record.size(); i++) {
                String columnName = schemaAttrs.get(schemaPrefix + (i + 1)); // 1-based column numbering
                if (columnName == null) {
                    // 1-based column index
                    columnName = String.valueOf(i + 1);
                }
                // TODO indexed schemaless parsing vs auto-schema vs user-provided schema
                String columnValue = record.get(i);
                if (trimValues) {
                    columnValue = columnValue.trim();
                }
                String attrName = (StringUtils.isBlank(valuePrefix) ? "delimited.column." : valuePrefix)
                        + columnName;
                outputAttrs.put(attrName, columnValue);
            }
        }
    });

    if (lineFound.get()) {
        FlowFile ff = session.putAllAttributes(original, outputAttrs);
        session.transfer(ff, REL_SUCCESS);
    } else {
        session.transfer(original, REL_FAILURE);
    }
}