Example usage for org.apache.commons.csv CSVParser getHeaderMap

List of usage examples for org.apache.commons.csv CSVParser getHeaderMap

Introduction

In this page you can find the example usage for org.apache.commons.csv CSVParser getHeaderMap.

Prototype

public Map<String, Integer> getHeaderMap() 

Source Link

Document

Returns a copy of the header map that iterates in column order.

Usage

From source file:net.tradelib.core.Series.java

static public Series fromCsv(String path, boolean header, DateTimeFormatter dtf, LocalTime lt)
        throws Exception {

    if (dtf == null) {
        if (lt == null)
            dtf = DateTimeFormatter.ISO_LOCAL_DATE_TIME;
        else//from   ww w. jav a 2s.  co m
            dtf = DateTimeFormatter.ISO_DATE;
    }

    // Parse and import the csv
    CSVFormat csvFmt = CSVFormat.DEFAULT.withCommentMarker('#').withIgnoreSurroundingSpaces();
    if (header)
        csvFmt = csvFmt.withHeader();
    CSVParser csv = csvFmt.parse(new BufferedReader(new FileReader(path)));

    int ncols = -1;
    Series result = null;
    double[] values = null;

    for (CSVRecord rec : csv.getRecords()) {
        if (result == null) {
            ncols = rec.size() - 1;
            values = new double[ncols];
            result = new Series(ncols);
        }

        for (int ii = 0; ii < ncols; ++ii) {
            values[ii] = Double.parseDouble(rec.get(ii + 1));
        }

        LocalDateTime ldt;
        if (lt != null) {
            ldt = LocalDate.parse(rec.get(0), dtf).atTime(lt);
        } else {
            ldt = LocalDateTime.parse(rec.get(0), dtf);
        }

        result.append(ldt, values);
    }

    if (header) {
        Map<String, Integer> headerMap = csv.getHeaderMap();
        result.clearNames();
        for (Map.Entry<String, Integer> me : headerMap.entrySet()) {
            if (me.getValue() > 0)
                result.setName(me.getKey(), me.getValue() - 1);
        }
    }

    return result;
}

From source file:org.apache.nifi.csv.CSVRecordSource.java

public CSVRecordSource(final InputStream in, final PropertyContext context) throws IOException {
    final String charset = context.getProperty(CSVUtils.CHARSET).getValue();

    final Reader reader;
    try {/*from  w  w w  .  j  av  a  2  s  . c o  m*/
        reader = new InputStreamReader(new BOMInputStream(in), charset);
    } catch (UnsupportedEncodingException e) {
        throw new ProcessException(e);
    }

    final CSVFormat csvFormat = CSVUtils.createCSVFormat(context).withFirstRecordAsHeader().withTrim();
    final CSVParser csvParser = new CSVParser(reader, csvFormat);
    fieldNames = Collections.unmodifiableList(new ArrayList<>(csvParser.getHeaderMap().keySet()));

    csvRecordIterator = csvParser.iterator();
}

From source file:org.apache.nifi.processors.csv.ExtractCSVHeader.java

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    final FlowFile original = session.get();
    if (original == null) {
        return;//from   w  w  w .ja  va2s.c o m
    }

    final AtomicBoolean lineFound = new AtomicBoolean(false);
    final Map<String, String> attrs = new HashMap<>();

    final AtomicInteger headerLength = new AtomicInteger(0);

    session.read(original, new InputStreamCallback() {
        @Override
        public void process(InputStream inputStream) throws IOException {
            // TODO expose the charset property?
            LineIterator iterator = IOUtils.lineIterator(inputStream, UTF_8);
            if (iterator.hasNext()) {
                lineFound.set(true);
                final String header = iterator.nextLine();

                final String format = context.getProperty(PROP_FORMAT).getValue();
                final String delimiter = context.getProperty(PROP_DELIMITER)
                        .evaluateAttributeExpressions(original).getValue();
                final String prefix = context.getProperty(PROP_SCHEMA_ATTR_PREFIX)
                        .evaluateAttributeExpressions(original).getValue();

                attrs.put(prefix + ATTR_HEADER_ORIGINAL, header);
                // TODO validate delimiter in the callback first
                final CSVFormat csvFormat = buildFormat(format, delimiter, true, // we assume first line is the header
                        null); // no custom header
                final CSVParser parser = csvFormat.parse(new StringReader(header));
                final Map<String, Integer> headers = parser.getHeaderMap();
                final int columnCount = headers.size();
                attrs.put(prefix + ATTR_HEADER_COLUMN_COUNT, String.valueOf(columnCount));
                for (Map.Entry<String, Integer> h : headers.entrySet()) {
                    // CSV columns are 1-based in Excel
                    attrs.put(prefix + (h.getValue() + 1), h.getKey());
                }

                // strip the header and send to the 'content' relationship
                if (StringUtils.isNotBlank(header)) {
                    int hLength = header.length();
                    // move past the new line if there are more lines
                    if (original.getSize() > hLength + 1) {
                        hLength++;
                    }
                    headerLength.set(hLength);
                }
            }
        }
    });

    if (lineFound.get()) {
        FlowFile ff = session.putAllAttributes(original, attrs);

        int offset = headerLength.get();
        if (offset > 0) {
            FlowFile contentOnly = session.clone(ff, offset, original.getSize() - offset);
            session.transfer(contentOnly, REL_CONTENT);
        }

        session.transfer(ff, REL_ORIGINAL);
    } else {
        session.transfer(original, REL_FAILURE);
    }
}

From source file:org.apache.nifi.processors.ParseCSV.ParseCSV.java

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {

    final Charset charset = Charset.defaultCharset();
    FlowFile flowFile = session.get();//from ww  w.  ja  v  a  2  s  .c  o  m
    if (flowFile == null) {
        return;
    }
    // TODO implement
    final Map<String, String> attributes = new LinkedHashMap<>();
    final String format = context.getProperty(FORMAT).getValue();
    final boolean create_attributes = Boolean.parseBoolean(context.getProperty(CREATE_ATTRIBUTES).getValue());
    final char delimiter = context.getProperty(DELIMITER).getValue().charAt(0);
    final boolean with_header = Boolean.parseBoolean(context.getProperty(WITH_HEADER).getValue());
    final String output_format = context.getProperty(OUTPUT_FORMAT).getValue();
    final String custom_header = context.getProperty(CUSTOM_HEADER).getValue();
    final String column_mask = context.getProperty(COLUMN_MASK).getValue();
    final String column_encrypt = context.getProperty(COLUMN_ENCRYPT).getValue();
    final String column_tokenize = context.getProperty(COLUMN_TOKENIZE).getValue();
    final String tokenize_unique_identifier = context.getProperty(TOKENIZE_UNQIUE_IDENTIFIER).getValue();
    final String tokenized_ouput = context.getProperty(TOKENIZED_OUTPUT).getValue();
    final String encryptionKey = "Bar12345Bar12345";
    final String static_schema = context.getProperty(STATIC_SCHEMA).getValue();

    // new flowfile here
    final org.apache.nifi.util.ObjectHolder<FlowFile> holder = new org.apache.nifi.util.ObjectHolder<>(null);

    flowFile = session.write(flowFile, new StreamCallback() {
        @Override
        public void process(InputStream inputStream, OutputStream outputStream) throws IOException {

            CSVFormat csvFormat = buildFormat(format, delimiter, with_header, custom_header);
            CSVParser csvParser = new CSVParser(new InputStreamReader(inputStream, charset), csvFormat);
            CSVPrinter csvPrinter = new CSVPrinter(new OutputStreamWriter(outputStream, charset), csvFormat);
            String headerArray[];

            ArrayList<String> columnMaskList = new ArrayList<>();
            ArrayList<String> columnEncryptList = new ArrayList<String>();
            ArrayList<String> columnTokenizeList = new ArrayList<String>();

            List<String> maskValueHolder = new LinkedList<>();
            FlowFile tokenized = session.create();

            // print header if needed
            if (custom_header != null && output_format.equals("CSV") && static_schema == null) {
                csvPrinter.printRecord(custom_header);
                headerArray = custom_header.split(",");
            } else if (static_schema != null && custom_header == null) {
                csvPrinter.printRecord(static_schema.replace("\"", ""));
                headerArray = static_schema.split(",");
            } else {
                headerArray = csvParser.getHeaderMap().keySet().toArray(new String[0]);
                csvPrinter.printRecord(headerArray);
            }

            if (column_mask != null) {
                columnMaskList = new ArrayList<>(Arrays.asList(column_mask.replace("\"", "").split(",")));
            }

            if (column_encrypt != null) {
                columnEncryptList = new ArrayList<>(Arrays.asList(column_encrypt.split(",")));
            }

            if (column_tokenize != null) {
                columnTokenizeList = new ArrayList<>(Arrays.asList(column_tokenize.split(",")));
            }

            // loop through records and print
            for (final CSVRecord record : csvParser) {

                Map<String, String> k = record.toMap();

                for (Map.Entry<String, String> konj : k.entrySet()) {
                    //System.out.println(konj.getValue());
                }
                // generate attributes if required per record
                if (create_attributes) {
                    for (int i = 0; i < headerArray.length; i++) {
                        //attributes.put(headerArray[i], record.get(i));
                        attributes.put(headerArray[i] + "." + record.getRecordNumber(), record.get(i));
                    }
                }
                // check masked columns
                if (column_mask != null || column_encrypt != null) {
                    // we have to loop through the header array and match user requested mask columns
                    for (int i = 0; i < headerArray.length; i++) {
                        //System.out.println(headerArray[i] + "." + record.getRecordNumber() + " - " + mask(record.get(i)));

                        if (columnMaskList.contains(headerArray[i])) {
                            // set mask
                            maskValueHolder.add(mask(record.get(i)));

                            // construct tokenization row for external DB store
                            if (columnTokenizeList.contains(headerArray[i])) {
                                final String tokenizedRow;
                                tokenizedRow = tokenizationOut(tokenized_ouput, headerArray[i],
                                        tokenize_unique_identifier, mask(record.get(i)), record.get(i),
                                        Long.toString(record.getRecordNumber()));

                                tokenized = session.append(tokenized, new OutputStreamCallback() {
                                    @Override
                                    public void process(OutputStream outputStream) throws IOException {
                                        outputStream.write(tokenizedRow.getBytes());
                                    }
                                });
                            }
                        } else if (columnEncryptList.contains(headerArray[i])) {
                            // encrypt
                            maskValueHolder.add(new String(Encrypt(record.get(i), encryptionKey), "UTF-8"));
                        } else {
                            // no mask
                            maskValueHolder.add(record.get(i));
                        }
                    }
                    csvPrinter.printRecord(maskValueHolder);
                    // clear mask column holder
                    maskValueHolder.clear();
                } else {
                    // no masking or encryption required, print record
                    switch (output_format) {
                    case "CSV":
                        //csvPrinter.printRecord(record);
                        List<String> items = Arrays.asList(static_schema.split(","));
                        String lastColumn = items.get(items.size() - 1);
                        String test = "";
                        for (String item : items) {
                            if (item != lastColumn) {
                                test += record.get(item) + ",";
                            } else {
                                test += record.get(item);
                            }
                        }

                        csvPrinter.printRecord(test.replace("^\"|\"$", ""));
                        break;
                    case "JSON":
                        String json = new ObjectMapper().writer().withDefaultPrettyPrinter()
                                .writeValueAsString(record.toMap()) + "\n";
                        if (json.length() > 0) {
                            outputStream.write(json.getBytes());
                        }

                        //List<Map<?, ?>> data = readObjectsFromCsv(inputStream);
                        //String adis = writeAsJson(data);
                        //outputStream.write(writeAsJson(data).getBytes());
                        break;
                    case "XML":
                        outputStream.write(new XmlMapper().writeValueAsString(record.toMap()).getBytes());
                        break;
                    }
                }
            }
            csvPrinter.flush();
            csvPrinter.close();
            holder.set(tokenized);
        }
    });

    flowFile = session.putAllAttributes(flowFile, attributes);
    session.transfer(flowFile, RELATIONSHIP_SUCCESS);
    session.transfer(holder.get(), RELATIONSHIP_TOKENIZED);
}

From source file:org.apache.phoenix.util.CSVCommonsLoader.java

private List<ColumnInfo> buildColumnInfoList(CSVParser parser) throws SQLException {
    List<String> columns = this.columns;
    switch (headerSource) {
    case FROM_TABLE:
        System.out.println(String.format("csv columns from database."));
        break;/*w  w  w .j  av a  2s.  c  o  m*/
    case IN_LINE:
        columns = new ArrayList<String>();
        for (String colName : parser.getHeaderMap().keySet()) {
            columns.add(colName); // iterates in column order
        }
        System.out.println(String.format("csv columns from header line. length=%s, %s", columns.size(),
                buildStringFromList(columns)));
        break;
    case SUPPLIED_BY_USER:
        System.out.println(String.format("csv columns from user. length=%s, %s", columns.size(),
                buildStringFromList(columns)));
        break;
    default:
        throw new IllegalStateException("parser has unknown column source.");
    }
    return generateColumnInfo(conn, tableName, columns, isStrict);
}

From source file:org.cast.cwm.service.UserSpreadsheetReader.java

/**
 * Read spreadsheet of user information and generate potential users.
 * Returns true if all was sucessful and users could be created as specified.
 * //from   w w w  .j ava2s.co m
 * This method does NOT modify the datastore.
 * 
 * @param stream the input stream of CSV data
 * @return true if no errors encountered.
 */
@Override
public boolean readInput(InputStream stream) {
    potentialUsers = new ArrayList<PotentialUserSave>();
    potentialSites = new HashMap<String, Site>();
    potentialPeriods = new HashMap<Site, Map<String, Period>>();

    CSVParser parser;
    try {
        parser = CSVFormat.EXCEL.withHeader().withIgnoreEmptyLines().withIgnoreSurroundingSpaces()
                .parse(new InputStreamReader(new BOMInputStream(stream), "UTF-8"));
    } catch (IOException e) {
        log.error(e.getMessage());
        globalError = e.getMessage();
        return false;
    }

    // Make our own secondary mapping of header names to fields, by
    // lowercasing and removing spaces from all header names
    headerMap = parser.getHeaderMap();
    for (String hdr : new HashSet<String>(headerMap.keySet())) {
        String normalized = hdr.toLowerCase().replaceAll("\\s", "");
        if (!normalized.equals(hdr)) {
            headerMap.put(normalized, headerMap.get(hdr));
        }
    }

    globalError = checkRequiredHeaders(headerMap);
    if (!Strings.isEmpty(globalError))
        return false;

    // Read the CSV file, create PotentialUserSave objects, record error messages, add to potentialUsers List
    try {
        boolean errors = false; // have errors been encountered?
        for (CSVRecord record : parser) {

            try {
                User user = createUserObject(record);
                String messages = populateUserObject(user, record);
                if (Strings.isEmpty(messages))
                    messages = validateUser(user);

                // Add a PotentialUserSave to the list.
                potentialUsers.add(new PotentialUserSave(modelProvider.modelOf(user), messages, record));
                if (!Strings.isEmpty(messages))
                    errors = true;

            } catch (ArrayIndexOutOfBoundsException e) {
                // This can happen if the last row is missing values; Excel doesn't fill them out to the last column
                log.error("Caught exception importing line {}: {}", parser.getCurrentLineNumber(),
                        e.getClass());
                potentialUsers.add(new PotentialUserSave(null, "Data missing from CSV.\n", record));
                errors = true;
            } catch (Exception e) {
                e.printStackTrace();
                log.error("Caught exception importing line {}: {}", parser.getCurrentLineNumber(),
                        e.getClass());
                potentialUsers.add(new PotentialUserSave(null, "Error: " + e, record));
                errors = true;
            }
        }

        // If CSV file has only one line, it is either empty or has unrecognized LF/CR values.
        if (parser.getCurrentLineNumber() == 1) {
            potentialUsers.add(
                    new PotentialUserSave(null, "Empty or Corrupted File.  Note: Save as Windows CSV.", null));
            globalError = "Empty or Corrupted File - LF/CR values may be invalid!";
            throw new CharacterCodingException();
        }
        return (!errors);

    } catch (CharacterCodingException e) {
        log.error("Empty or Corrupted File - only 1 line found - CR/LF issue?. {}", e.getClass());
        return false;
    }

}

From source file:org.nuxeo.ecm.csv.core.CSVImporterWork.java

protected void doImport(CSVParser parser) {
    log.info(String.format("Importing CSV file: %s", getBlob().getFilename()));
    Map<String, Integer> header = parser.getHeaderMap();
    if (header == null) {
        logError(0, "No header line, empty file?", LABEL_CSV_IMPORTER_EMPTY_FILE);
        return;/*  www .  ja  va2 s  .co  m*/
    }
    if (!header.containsKey(CSV_NAME_COL)) {
        logError(0, "Missing 'name' column", LABEL_CSV_IMPORTER_MISSING_NAME_COLUMN);
        return;
    }
    hasTypeColumn = header.containsKey(CSV_TYPE_COL);

    try {
        int batchSize = options.getBatchSize();
        Iterable<CSVRecord> it = parser;
        if (computeTotal) {
            try {
                List<CSVRecord> l = parser.getRecords();
                total = l.size();
                it = l;
            } catch (IOException e) {
                log.warn("Could not compute total number of document to be imported");
            }
        }
        for (CSVRecord record : it) {
            if (record.size() == 0) {
                // empty record
                importLogs.add(new CSVImportLog(getLineNumber(record), Status.SKIPPED, "Empty record",
                        LABEL_CSV_IMPORTER_EMPTY_LINE));
                continue;
            }
            try {
                if (importRecord(record, header)) {
                    docsCreatedCount++;
                    getStore().putParameter(id, "status",
                            new CSVImportStatus(CSVImportStatus.State.RUNNING, docsCreatedCount, total));
                    if (docsCreatedCount % batchSize == 0) {
                        commitOrRollbackTransaction();
                        startTransaction();
                    }
                }
            } catch (NuxeoException e) {
                // try next line
                Throwable unwrappedException = unwrapException(e);
                logError(getLineNumber(parser), "Error while importing line: %s",
                        LABEL_CSV_IMPORTER_ERROR_IMPORTING_LINE, unwrappedException.getMessage());
                log.debug(unwrappedException, unwrappedException);
            }
        }

        try {
            session.save();
        } catch (NuxeoException e) {
            Throwable ue = unwrapException(e);
            logError(getLineNumber(parser), "Unable to save: %s", LABEL_CSV_IMPORTER_UNABLE_TO_SAVE,
                    ue.getMessage());
            log.debug(ue, ue);
        }
    } finally {
        commitOrRollbackTransaction();
        startTransaction();
    }
    log.info(String.format("Done importing CSV file: %s", getBlob().getFilename()));
}

From source file:org.nuxeo.ecm.user.center.profile.UserProfileImporter.java

public void doImport(CoreSession session, CSVParser parser, UserProfileService userProfileService)
        throws IOException {
    log.info(String.format("Importing CSV file: %s", dataFileName));

    DocumentType docType = Framework.getLocalService(SchemaManager.class)
            .getDocumentType(UserProfileConstants.USER_PROFILE_DOCTYPE);
    if (docType == null) {
        log.error("The type " + UserProfileConstants.USER_PROFILE_DOCTYPE + " does not exist");
        return;/* w  ww  . ja v a 2s.c o  m*/
    }

    Map<String, Integer> header = parser.getHeaderMap();

    if (header == null) {
        // empty file?
        log.error("No header line, empty file?");
        return;
    }

    // find the index for the required name and type values
    Integer nameIndex = header.get(UserProfileImporter.USER_PROFILE_IMPORTER_USERNAME_COL);
    if (nameIndex == null) {
        log.error("Missing 'username' column");
        return;
    }

    long docsUpdatedCount = 0;
    totalRecords = parser.getRecordNumber();
    try {
        int batchSize = config.getBatchSize();
        long lineNumber = 0;

        for (CSVRecord record : parser.getRecords()) {
            lineNumber++;
            currentRecord = lineNumber;

            try {
                if (importLine(record, lineNumber, nameIndex, docType, session, userProfileService, header)) {
                    docsUpdatedCount++;
                    if (docsUpdatedCount % batchSize == 0) {
                        commitOrRollbackTransaction();
                        startTransaction();
                    }
                }
            } catch (NuxeoException e) {
                // try next line
                Throwable unwrappedException = unwrapException(e);
                logImportError(lineNumber, "Error while importing line: %s", unwrappedException.getMessage());
                log.debug(unwrappedException, unwrappedException);
            }
        }

        session.save();
    } finally {
        commitOrRollbackTransaction();
        startTransaction();
    }
    log.info(String.format("Done importing %s entries from CSV file: %s", docsUpdatedCount, dataFileName));
}

From source file:org.openlmis.fulfillment.Resource2Db.java

Pair<List<String>, List<Object[]>> resourceCsvToBatchedPair(final Resource resource) throws IOException {
    XLOGGER.entry(resource.getDescription());

    // parse CSV//  w  w  w. ja v a  2 s .c  om
    try (InputStreamReader isReader = new InputStreamReader(
            new BOMInputStream(resource.getInputStream(), ByteOrderMark.UTF_8))) {
        CSVParser parser = CSVFormat.DEFAULT.withHeader().withNullString("").parse(isReader);

        // read header row
        MutablePair<List<String>, List<Object[]>> readData = new MutablePair<>();
        readData.setLeft(new ArrayList<>(parser.getHeaderMap().keySet()));
        XLOGGER.info("Read header: " + readData.getLeft());

        // read data rows
        List<Object[]> rows = new ArrayList<>();
        for (CSVRecord record : parser.getRecords()) {
            if (!record.isConsistent()) {
                throw new IllegalArgumentException("CSV record inconsistent: " + record);
            }

            List theRow = IteratorUtils.toList(record.iterator());
            rows.add(theRow.toArray());
        }
        readData.setRight(rows);

        XLOGGER.exit("Records read: " + readData.getRight().size());
        return readData;
    }
}

From source file:permafrost.tundra.data.IDataCSVParser.java

/**
 * Returns an IData representation of the CSV data in the given input stream.
 *
 * @param inputStream The input stream to be decoded.
 * @param charset     The character set to use.
 * @return An IData representation of the given input stream data.
 * @throws IOException If there is a problem reading from the stream.
 *///from w  w w. j  a  v a 2 s  .  com
@Override
public IData decode(InputStream inputStream, Charset charset) throws IOException {
    if (inputStream == null)
        return null;

    Reader reader = new InputStreamReader(inputStream, CharsetHelper.normalize(charset));
    CSVFormat format = CSVFormat.DEFAULT.withHeader().withDelimiter(delimiter).withNullString("");
    CSVParser parser = format.parse(reader);

    Set<String> keys = parser.getHeaderMap().keySet();
    List<IData> list = new ArrayList<IData>();

    for (CSVRecord record : parser) {
        IData document = IDataFactory.create();
        IDataCursor cursor = document.getCursor();
        for (String key : keys) {
            if (record.isSet(key)) {
                String value = record.get(key);
                if (value != null)
                    IDataUtil.put(cursor, key, value);
            }
        }
        cursor.destroy();
        list.add(document);
    }

    IData output = IDataFactory.create();
    IDataCursor cursor = output.getCursor();
    IDataUtil.put(cursor, "recordWithNoID", list.toArray(new IData[list.size()]));

    return output;
}