Example usage for org.apache.commons.csv CSVParser getHeaderMap

Introduction

In this page you can find the example usage for org.apache.commons.csv CSVParser getHeaderMap.

Prototype

public Map<String, Integer> getHeaderMap()

Source Link

Document

Returns a copy of the header map that iterates in column order.

Usage

From source file:net.tradelib.core.Series.java

static public Series fromCsv(String path, boolean header, DateTimeFormatter dtf, LocalTime lt)
        throws Exception {

    if (dtf == null) {
        if (lt == null)
            dtf = DateTimeFormatter.ISO_LOCAL_DATE_TIME;
        else//from   ww w. jav a 2s.  co m
            dtf = DateTimeFormatter.ISO_DATE;
    }

    // Parse and import the csv
    CSVFormat csvFmt = CSVFormat.DEFAULT.withCommentMarker('#').withIgnoreSurroundingSpaces();
    if (header)
        csvFmt = csvFmt.withHeader();
    CSVParser csv = csvFmt.parse(new BufferedReader(new FileReader(path)));

    int ncols = -1;
    Series result = null;
    double[] values = null;

    for (CSVRecord rec : csv.getRecords()) {
        if (result == null) {
            ncols = rec.size() - 1;
            values = new double[ncols];
            result = new Series(ncols);
        }

        for (int ii = 0; ii < ncols; ++ii) {
            values[ii] = Double.parseDouble(rec.get(ii + 1));
        }

        LocalDateTime ldt;
        if (lt != null) {
            ldt = LocalDate.parse(rec.get(0), dtf).atTime(lt);
        } else {
            ldt = LocalDateTime.parse(rec.get(0), dtf);
        }

        result.append(ldt, values);
    }

    if (header) {
        Map<String, Integer> headerMap = csv.getHeaderMap();
        result.clearNames();
        for (Map.Entry<String, Integer> me : headerMap.entrySet()) {
            if (me.getValue() > 0)
                result.setName(me.getKey(), me.getValue() - 1);
        }
    }

    return result;
}

From source file:org.apache.nifi.csv.CSVRecordSource.java

public CSVRecordSource(final InputStream in, final PropertyContext context) throws IOException {
    final String charset = context.getProperty(CSVUtils.CHARSET).getValue();

    final Reader reader;
    try {/*from  w  w w  .  j  av  a  2  s  . c o  m*/
        reader = new InputStreamReader(new BOMInputStream(in), charset);
    } catch (UnsupportedEncodingException e) {
        throw new ProcessException(e);
    }

    final CSVFormat csvFormat = CSVUtils.createCSVFormat(context).withFirstRecordAsHeader().withTrim();
    final CSVParser csvParser = new CSVParser(reader, csvFormat);
    fieldNames = Collections.unmodifiableList(new ArrayList<>(csvParser.getHeaderMap().keySet()));

    csvRecordIterator = csvParser.iterator();
}

From source file:org.apache.nifi.processors.csv.ExtractCSVHeader.java

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    final FlowFile original = session.get();
    if (original == null) {
        return;//from   w  w  w .ja  va2s.c o m
    }

    final AtomicBoolean lineFound = new AtomicBoolean(false);
    final Map<String, String> attrs = new HashMap<>();

    final AtomicInteger headerLength = new AtomicInteger(0);

    session.read(original, new InputStreamCallback() {
        @Override
        public void process(InputStream inputStream) throws IOException {
            // TODO expose the charset property?
            LineIterator iterator = IOUtils.lineIterator(inputStream, UTF_8);
            if (iterator.hasNext()) {
                lineFound.set(true);
                final String header = iterator.nextLine();

                final String format = context.getProperty(PROP_FORMAT).getValue();
                final String delimiter = context.getProperty(PROP_DELIMITER)
                        .evaluateAttributeExpressions(original).getValue();
                final String prefix = context.getProperty(PROP_SCHEMA_ATTR_PREFIX)
                        .evaluateAttributeExpressions(original).getValue();

                attrs.put(prefix + ATTR_HEADER_ORIGINAL, header);
                // TODO validate delimiter in the callback first
                final CSVFormat csvFormat = buildFormat(format, delimiter, true, // we assume first line is the header
                        null); // no custom header
                final CSVParser parser = csvFormat.parse(new StringReader(header));
                final Map<String, Integer> headers = parser.getHeaderMap();
                final int columnCount = headers.size();
                attrs.put(prefix + ATTR_HEADER_COLUMN_COUNT, String.valueOf(columnCount));
                for (Map.Entry<String, Integer> h : headers.entrySet()) {
                    // CSV columns are 1-based in Excel
                    attrs.put(prefix + (h.getValue() + 1), h.getKey());
                }

                // strip the header and send to the 'content' relationship
                if (StringUtils.isNotBlank(header)) {
                    int hLength = header.length();
                    // move past the new line if there are more lines
                    if (original.getSize() > hLength + 1) {
                        hLength++;
                    }
                    headerLength.set(hLength);
                }
            }
        }
    });

    if (lineFound.get()) {
        FlowFile ff = session.putAllAttributes(original, attrs);

        int offset = headerLength.get();
        if (offset > 0) {
            FlowFile contentOnly = session.clone(ff, offset, original.getSize() - offset);
            session.transfer(contentOnly, REL_CONTENT);
        }

        session.transfer(ff, REL_ORIGINAL);
    } else {
        session.transfer(original, REL_FAILURE);
    }
}

From source file:org.apache.nifi.processors.ParseCSV.ParseCSV.java

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {

    final Charset charset = Charset.defaultCharset();
    FlowFile flowFile = session.get();//from ww  w.  ja  v  a  2  s  .c  o  m
    if (flowFile == null) {
        return;
    }
    // TODO implement
    final Map<String, String> attributes = new LinkedHashMap<>();
    final String format = context.getProperty(FORMAT).getValue();
    final boolean create_attributes = Boolean.parseBoolean(context.getProperty(CREATE_ATTRIBUTES).getValue());
    final char delimiter = context.getProperty(DELIMITER).getValue().charAt(0);
    final boolean with_header = Boolean.parseBoolean(context.getProperty(WITH_HEADER).getValue());
    final String output_format = context.getProperty(OUTPUT_FORMAT).getValue();
    final String custom_header = context.getProperty(CUSTOM_HEADER).getValue();
    final String column_mask = context.getProperty(COLUMN_MASK).getValue();
    final String column_encrypt = context.getProperty(COLUMN_ENCRYPT).getValue();
    final String column_tokenize = context.getProperty(COLUMN_TOKENIZE).getValue();
    final String tokenize_unique_identifier = context.getProperty(TOKENIZE_UNQIUE_IDENTIFIER).getValue();
    final String tokenized_ouput = context.getProperty(TOKENIZED_OUTPUT).getValue();
    final String encryptionKey = "Bar12345Bar12345";
    final String static_schema = context.getProperty(STATIC_SCHEMA).getValue();

    // new flowfile here
    final org.apache.nifi.util.ObjectHolder<FlowFile> holder = new org.apache.nifi.util.ObjectHolder<>(null);

    flowFile = session.write(flowFile, new StreamCallback() {
        @Override
        public void process(InputStream inputStream, OutputStream outputStream) throws IOException {

            CSVFormat csvFormat = buildFormat(format, delimiter, with_header, custom_header);
            CSVParser csvParser = new CSVParser(new InputStreamReader(inputStream, charset), csvFormat);
            CSVPrinter csvPrinter = new CSVPrinter(new OutputStreamWriter(outputStream, charset), csvFormat);
            String headerArray[];

            ArrayList<String> columnMaskList = new ArrayList<>();
            ArrayList<String> columnEncryptList = new ArrayList<String>();
            ArrayList<String> columnTokenizeList = new ArrayList<String>();

            List<String> maskValueHolder = new LinkedList<>();
            FlowFile tokenized = session.create();

            // print header if needed
            if (custom_header != null && output_format.equals("CSV") && static_schema == null) {
                csvPrinter.printRecord(custom_header);
                headerArray = custom_header.split(",");
            } else if (static_schema != null && custom_header == null) {
                csvPrinter.printRecord(static_schema.replace("\"", ""));
                headerArray = static_schema.split(",");
            } else {
                headerArray = csvParser.getHeaderMap().keySet().toArray(new String[0]);
                csvPrinter.printRecord(headerArray);
            }

            if (column_mask != null) {
                columnMaskList = new ArrayList<>(Arrays.asList(column_mask.replace("\"", "").split(",")));
            }

            if (column_encrypt != null) {
                columnEncryptList = new ArrayList<>(Arrays.asList(column_encrypt.split(",")));
            }

            if (column_tokenize != null) {
                columnTokenizeList = new ArrayList<>(Arrays.asList(column_tokenize.split(",")));
            }

            // loop through records and print
            for (final CSVRecord record : csvParser) {

                Map<String, String> k = record.toMap();

                for (Map.Entry<String, String> konj : k.entrySet()) {
                    //System.out.println(konj.getValue());
                }
                // generate attributes if required per record
                if (create_attributes) {
                    for (int i = 0; i < headerArray.length; i++) {
                        //attributes.put(headerArray[i], record.get(i));
                        attributes.put(headerArray[i] + "." + record.getRecordNumber(), record.get(i));
                    }
                }
                // check masked columns
                if (column_mask != null || column_encrypt != null) {
                    // we have to loop through the header array and match user requested mask columns
                    for (int i = 0; i < headerArray.length; i++) {
                        //System.out.println(headerArray[i] + "." + record.getRecordNumber() + " - " + mask(record.get(i)));

                        if (columnMaskList.contains(headerArray[i])) {
                            // set mask
                            maskValueHolder.add(mask(record.get(i)));

                            // construct tokenization row for external DB store
                            if (columnTokenizeList.contains(headerArray[i])) {
                                final String tokenizedRow;
                                tokenizedRow = tokenizationOut(tokenized_ouput, headerArray[i],
                                        tokenize_unique_identifier, mask(record.get(i)), record.get(i),
                                        Long.toString(record.getRecordNumber()));

                                tokenized = session.append(tokenized, new OutputStreamCallback() {
                                    @Override
                                    public void process(OutputStream outputStream) throws IOException {
                                        outputStream.write(tokenizedRow.getBytes());
                                    }
                                });
                            }
                        } else if (columnEncryptList.contains(headerArray[i])) {
                            // encrypt
                            maskValueHolder.add(new String(Encrypt(record.get(i), encryptionKey), "UTF-8"));
                        } else {
                            // no mask
                            maskValueHolder.add(record.get(i));
                        }
                    }
                    csvPrinter.printRecord(maskValueHolder);
                    // clear mask column holder
                    maskValueHolder.clear();
                } else {
                    // no masking or encryption required, print record
                    switch (output_format) {
                    case "CSV":
                        //csvPrinter.printRecord(record);
                        List<String> items = Arrays.asList(static_schema.split(","));
                        String lastColumn = items.get(items.size() - 1);
                        String test = "";
                        for (String item : items) {
                            if (item != lastColumn) {
                                test += record.get(item) + ",";
                            } else {
                                test += record.get(item);
                            }
                        }

                        csvPrinter.printRecord(test.replace("^\"|\"$", ""));
                        break;
                    case "JSON":
                        String json = new ObjectMapper().writer().withDefaultPrettyPrinter()
                                .writeValueAsString(record.toMap()) + "\n";
                        if (json.length() > 0) {
                            outputStream.write(json.getBytes());
                        }

                        //List<Map<?, ?>> data = readObjectsFromCsv(inputStream);
                        //String adis = writeAsJson(data);
                        //outputStream.write(writeAsJson(data).getBytes());
                        break;
                    case "XML":
                        outputStream.write(new XmlMapper().writeValueAsString(record.toMap()).getBytes());
                        break;
                    }
                }
            }
            csvPrinter.flush();
            csvPrinter.close();
            holder.set(tokenized);
        }
    });

    flowFile = session.putAllAttributes(flowFile, attributes);
    session.transfer(flowFile, RELATIONSHIP_SUCCESS);
    session.transfer(holder.get(), RELATIONSHIP_TOKENIZED);
}

From source file:org.apache.phoenix.util.CSVCommonsLoader.java

private List<ColumnInfo> buildColumnInfoList(CSVParser parser) throws SQLException {
    List<String> columns = this.columns;
    switch (headerSource) {
    case FROM_TABLE:
        System.out.println(String.format("csv columns from database."));
        break;/*w  w  w .j  av a  2s.  c  o  m*/
    case IN_LINE:
        columns = new ArrayList<String>();
        for (String colName : parser.getHeaderMap().keySet()) {
            columns.add(colName); // iterates in column order
        }
        System.out.println(String.format("csv columns from header line. length=%s, %s", columns.size(),
                buildStringFromList(columns)));
        break;
    case SUPPLIED_BY_USER:
        System.out.println(String.format("csv columns from user. length=%s, %s", columns.size(),
                buildStringFromList(columns)));
        break;
    default:
        throw new IllegalStateException("parser has unknown column source.");
    }
    return generateColumnInfo(conn, tableName, columns, isStrict);
}

From source file:org.cast.cwm.service.UserSpreadsheetReader.java

/**
 * Read spreadsheet of user information and generate potential users.
 * Returns true if all was sucessful and users could be created as specified.
 * //from   w w w  .j ava2s.co m
 * This method does NOT modify the datastore.
 * 
 * @param stream the input stream of CSV data
 * @return true if no errors encountered.
 */
@Override
public boolean readInput(InputStream stream) {
    potentialUsers = new ArrayList<PotentialUserSave>();
    potentialSites = new HashMap<String, Site>();
    potentialPeriods = new HashMap<Site, Map<String, Period>>();

    CSVParser parser;
    try {
        parser = CSVFormat.EXCEL.withHeader().withIgnoreEmptyLines().withIgnoreSurroundingSpaces()
                .parse(new InputStreamReader(new BOMInputStream(stream), "UTF-8"));
    } catch (IOException e) {
        log.error(e.getMessage());
        globalError = e.getMessage();
        return false;
    }

    // Make our own secondary mapping of header names to fields, by
    // lowercasing and removing spaces from all header names
    headerMap = parser.getHeaderMap();
    for (String hdr : new HashSet<String>(headerMap.keySet())) {
        String normalized = hdr.toLowerCase().replaceAll("\\s", "");
        if (!normalized.equals(hdr)) {
            headerMap.put(normalized, headerMap.get(hdr));
        }
    }

    globalError = checkRequiredHeaders(headerMap);
    if (!Strings.isEmpty(globalError))
        return false;

    // Read the CSV file, create PotentialUserSave objects, record error messages, add to potentialUsers List
    try {
        boolean errors = false; // have errors been encountered?
        for (CSVRecord record : parser) {

            try {
                User user = createUserObject(record);
                String messages = populateUserObject(user, record);
                if (Strings.isEmpty(messages))
                    messages = validateUser(user);

                // Add a PotentialUserSave to the list.
                potentialUsers.add(new PotentialUserSave(modelProvider.modelOf(user), messages, record));
                if (!Strings.isEmpty(messages))
                    errors = true;

            } catch (ArrayIndexOutOfBoundsException e) {
                // This can happen if the last row is missing values; Excel doesn't fill them out to the last column
                log.error("Caught exception importing line {}: {}", parser.getCurrentLineNumber(),
                        e.getClass());
                potentialUsers.add(new PotentialUserSave(null, "Data missing from CSV.\n", record));
                errors = true;
            } catch (Exception e) {
                e.printStackTrace();
                log.error("Caught exception importing line {}: {}", parser.getCurrentLineNumber(),
                        e.getClass());
                potentialUsers.add(new PotentialUserSave(null, "Error: " + e, record));
                errors = true;
            }
        }

        // If CSV file has only one line, it is either empty or has unrecognized LF/CR values.
        if (parser.getCurrentLineNumber() == 1) {
            potentialUsers.add(
                    new PotentialUserSave(null, "Empty or Corrupted File.  Note: Save as Windows CSV.", null));
            globalError = "Empty or Corrupted File - LF/CR values may be invalid!";
            throw new CharacterCodingException();
        }
        return (!errors);

    } catch (CharacterCodingException e) {
        log.error("Empty or Corrupted File - only 1 line found - CR/LF issue?. {}", e.getClass());
        return false;
    }

}

From source file:org.nuxeo.ecm.csv.core.CSVImporterWork.java

protected void doImport(CSVParser parser) {
    log.info(String.format("Importing CSV file: %s", getBlob().getFilename()));
    Map<String, Integer> header = parser.getHeaderMap();
    if (header == null) {
        logError(0, "No header line, empty file?", LABEL_CSV_IMPORTER_EMPTY_FILE);
        return;/*  www .  ja  va2 s  .co  m*/
    }
    if (!header.containsKey(CSV_NAME_COL)) {
        logError(0, "Missing 'name' column", LABEL_CSV_IMPORTER_MISSING_NAME_COLUMN);
        return;
    }
    hasTypeColumn = header.containsKey(CSV_TYPE_COL);

    try {
        int batchSize = options.getBatchSize();
        Iterable<CSVRecord> it = parser;
        if (computeTotal) {
            try {
                List<CSVRecord> l = parser.getRecords();
                total = l.size();
                it = l;
            } catch (IOException e) {
                log.warn("Could not compute total number of document to be imported");
            }
        }
        for (CSVRecord record : it) {
            if (record.size() == 0) {
                // empty record
                importLogs.add(new CSVImportLog(getLineNumber(record), Status.SKIPPED, "Empty record",
                        LABEL_CSV_IMPORTER_EMPTY_LINE));
                continue;
            }
            try {
                if (importRecord(record, header)) {
                    docsCreatedCount++;
                    getStore().putParameter(id, "status",
                            new CSVImportStatus(CSVImportStatus.State.RUNNING, docsCreatedCount, total));
                    if (docsCreatedCount % batchSize == 0) {
                        commitOrRollbackTransaction();
                        startTransaction();
                    }
                }
            } catch (NuxeoException e) {
                // try next line
                Throwable unwrappedException = unwrapException(e);
                logError(getLineNumber(parser), "Error while importing line: %s",
                        LABEL_CSV_IMPORTER_ERROR_IMPORTING_LINE, unwrappedException.getMessage());
                log.debug(unwrappedException, unwrappedException);
            }
        }

        try {
            session.save();
        } catch (NuxeoException e) {
            Throwable ue = unwrapException(e);
            logError(getLineNumber(parser), "Unable to save: %s", LABEL_CSV_IMPORTER_UNABLE_TO_SAVE,
                    ue.getMessage());
            log.debug(ue, ue);
        }
    } finally {
        commitOrRollbackTransaction();
        startTransaction();
    }
    log.info(String.format("Done importing CSV file: %s", getBlob().getFilename()));
}

From source file:org.nuxeo.ecm.user.center.profile.UserProfileImporter.java

public void doImport(CoreSession session, CSVParser parser, UserProfileService userProfileService)
        throws IOException {
    log.info(String.format("Importing CSV file: %s", dataFileName));

    DocumentType docType = Framework.getLocalService(SchemaManager.class)
            .getDocumentType(UserProfileConstants.USER_PROFILE_DOCTYPE);
    if (docType == null) {
        log.error("The type " + UserProfileConstants.USER_PROFILE_DOCTYPE + " does not exist");
        return;/* w  ww  . ja v a 2s.c o  m*/
    }

    Map<String, Integer> header = parser.getHeaderMap();

    if (header == null) {
        // empty file?
        log.error("No header line, empty file?");
        return;
    }

    // find the index for the required name and type values
    Integer nameIndex = header.get(UserProfileImporter.USER_PROFILE_IMPORTER_USERNAME_COL);
    if (nameIndex == null) {
        log.error("Missing 'username' column");
        return;
    }

    long docsUpdatedCount = 0;
    totalRecords = parser.getRecordNumber();
    try {
        int batchSize = config.getBatchSize();
        long lineNumber = 0;

        for (CSVRecord record : parser.getRecords()) {
            lineNumber++;
            currentRecord = lineNumber;

            try {
                if (importLine(record, lineNumber, nameIndex, docType, session, userProfileService, header)) {
                    docsUpdatedCount++;
                    if (docsUpdatedCount % batchSize == 0) {
                        commitOrRollbackTransaction();
                        startTransaction();
                    }
                }
            } catch (NuxeoException e) {
                // try next line
                Throwable unwrappedException = unwrapException(e);
                logImportError(lineNumber, "Error while importing line: %s", unwrappedException.getMessage());
                log.debug(unwrappedException, unwrappedException);
            }
        }

        session.save();
    } finally {
        commitOrRollbackTransaction();
        startTransaction();
    }
    log.info(String.format("Done importing %s entries from CSV file: %s", docsUpdatedCount, dataFileName));
}

From source file:org.openlmis.fulfillment.Resource2Db.java

Pair<List<String>, List<Object[]>> resourceCsvToBatchedPair(final Resource resource) throws IOException {
    XLOGGER.entry(resource.getDescription());

    // parse CSV//  w  w  w. ja v a  2 s .c  om
    try (InputStreamReader isReader = new InputStreamReader(
            new BOMInputStream(resource.getInputStream(), ByteOrderMark.UTF_8))) {
        CSVParser parser = CSVFormat.DEFAULT.withHeader().withNullString("").parse(isReader);

        // read header row
        MutablePair<List<String>, List<Object[]>> readData = new MutablePair<>();
        readData.setLeft(new ArrayList<>(parser.getHeaderMap().keySet()));
        XLOGGER.info("Read header: " + readData.getLeft());

        // read data rows
        List<Object[]> rows = new ArrayList<>();
        for (CSVRecord record : parser.getRecords()) {
            if (!record.isConsistent()) {
                throw new IllegalArgumentException("CSV record inconsistent: " + record);
            }

            List theRow = IteratorUtils.toList(record.iterator());
            rows.add(theRow.toArray());
        }
        readData.setRight(rows);

        XLOGGER.exit("Records read: " + readData.getRight().size());
        return readData;
    }
}

From source file:permafrost.tundra.data.IDataCSVParser.java

/**
 * Returns an IData representation of the CSV data in the given input stream.
 *
 * @param inputStream The input stream to be decoded.
 * @param charset     The character set to use.
 * @return An IData representation of the given input stream data.
 * @throws IOException If there is a problem reading from the stream.
 *///from w  w w. j  a  v a 2 s  .  com
@Override
public IData decode(InputStream inputStream, Charset charset) throws IOException {
    if (inputStream == null)
        return null;

    Reader reader = new InputStreamReader(inputStream, CharsetHelper.normalize(charset));
    CSVFormat format = CSVFormat.DEFAULT.withHeader().withDelimiter(delimiter).withNullString("");
    CSVParser parser = format.parse(reader);

    Set<String> keys = parser.getHeaderMap().keySet();
    List<IData> list = new ArrayList<IData>();

    for (CSVRecord record : parser) {
        IData document = IDataFactory.create();
        IDataCursor cursor = document.getCursor();
        for (String key : keys) {
            if (record.isSet(key)) {
                String value = record.get(key);
                if (value != null)
                    IDataUtil.put(cursor, key, value);
            }
        }
        cursor.destroy();
        list.add(document);
    }

    IData output = IDataFactory.create();
    IDataCursor cursor = output.getCursor();
    IDataUtil.put(cursor, "recordWithNoID", list.toArray(new IData[list.size()]));

    return output;
}