Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package com.datamelt.nifi.processors; import java.io.IOException; import java.io.InputStream; import java.text.DecimalFormat; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import org.apache.commons.io.IOUtils; import org.apache.nifi.components.PropertyDescriptor; import org.apache.nifi.flowfile.FlowFile; import org.apache.nifi.logging.ComponentLog; import org.apache.nifi.processor.AbstractProcessor; import org.apache.nifi.processor.ProcessContext; import org.apache.nifi.processor.ProcessSession; import org.apache.nifi.processor.ProcessorInitializationContext; import org.apache.nifi.processor.Relationship; import org.apache.nifi.annotation.behavior.SideEffectFree; import org.apache.nifi.annotation.documentation.CapabilityDescription; import org.apache.nifi.annotation.documentation.SeeAlso; import org.apache.nifi.annotation.documentation.Tags; import org.apache.nifi.processor.exception.ProcessException; import org.apache.nifi.processor.io.InputStreamCallback; import org.apache.nifi.processor.util.StandardValidators; /** * This Apache Nifi processor will allow to split the incoming content of a flowfile * into separate fields using a defined separator. * <p> * The values of the individual fields will be assigned to flowfile attributes. Each attribute * is named using the defined field prefix plus the positional number of the field. * <p> * A number format can optionally be specified to format the column number. The number format needs * to be according to the Java DecimalFormat class. * <p> * <p> * Example: * <p> * A flow file with following content: * <p> * Peterson, Jenny, New York, USA * <p> * When the field prefix is set to "column_" and the field number format is set to "000" the result will be 4 attributes: * <p> * column_000 = Peterson * column_001 = Jenny * column_002 = New York * column_003 = USA * * @author uwe geercken - last update 2017-03-18 */ @SideEffectFree @Tags({ "CSV", "attributes", "split", "text" }) @CapabilityDescription("Splits the content from a flowfile - must be a single line of CSV data - into individual columns and assigns them to flow file attributes. The resulting attributes are named using the field prefix plus the column position in the CSV row " + "and the value from the content as an attribute.") @SeeAlso(MergeTemplate.class) public class SplitToAttribute extends AbstractProcessor { private List<PropertyDescriptor> properties; private Set<Relationship> relationships; private final ComponentLog logger = getLogger(); // map used to store the attribute name and its value from the content of the flowfile private final Map<String, String> propertyMap = new HashMap<>(); private static final String PROPERTY_ATTRIBUTE_PREFIX_NAME = "Attribute prefix"; private static final String PROPERTY_ATTRIBUTE_PREFIX_DEFAULT = "column_"; private static final String PROPERTY_ATTRIBUTE_PREFIX_ATTRIBUTE_NAME = "attribute.prefix"; private static final String PROPERTY_FIELD_SEPERATOR_NAME = "Field separator"; private static final String PROPERTY_FIELD_NUMBER_NUMBERFORMAT_NAME = "Field Number Format"; private static final String PROPERTY_FIELD_NUMBER_NUMBERFORMAT_DEFAULT = "0000"; private static final String RELATIONSHIP_SUCESS_NAME = "success"; public static final PropertyDescriptor ATTRIBUTE_PREFIX = new PropertyDescriptor.Builder() .name(PROPERTY_ATTRIBUTE_PREFIX_NAME).required(true).defaultValue(PROPERTY_ATTRIBUTE_PREFIX_DEFAULT) .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) .description( "Specify which String is used to prefix the attributes. If the prefix is e.g. \"column\" then the attributes will be " + "named: \"column0\", \"column1\",\"column2\", etc.") .build(); public static final PropertyDescriptor FIELD_SEPARATOR = new PropertyDescriptor.Builder() .name(PROPERTY_FIELD_SEPERATOR_NAME).required(true).addValidator(StandardValidators.NON_EMPTY_VALIDATOR) .description("Specify the field separator used to split the incomming flow file content.").build(); public static final PropertyDescriptor FIELD_NUMBER_NUMBERFORMAT = new PropertyDescriptor.Builder() .name(PROPERTY_FIELD_NUMBER_NUMBERFORMAT_NAME).required(true) .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) .defaultValue(PROPERTY_FIELD_NUMBER_NUMBERFORMAT_DEFAULT) .description( "Specify the number format for the field number. E.g. \"000\" to get a three digit formatting. According to Java DecimalFormat class.") .build(); public static final Relationship SUCCESS = new Relationship.Builder().name(RELATIONSHIP_SUCESS_NAME) .description("The flowfile content was successfully split into individual fields").build(); @Override public void init(final ProcessorInitializationContext context) { List<PropertyDescriptor> properties = new ArrayList<>(); properties.add(ATTRIBUTE_PREFIX); properties.add(FIELD_SEPARATOR); properties.add(FIELD_NUMBER_NUMBERFORMAT); this.properties = Collections.unmodifiableList(properties); Set<Relationship> relationships = new HashSet<>(); relationships.add(SUCCESS); this.relationships = Collections.unmodifiableSet(relationships); } @Override public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException { // get selected number format for the field number String numberFormat = context.getProperty(FIELD_NUMBER_NUMBERFORMAT).getValue(); // for formatting the number final DecimalFormat df; if (numberFormat != null && !numberFormat.trim().equals("")) { df = new DecimalFormat(context.getProperty(FIELD_NUMBER_NUMBERFORMAT).getValue()); } else { df = new DecimalFormat(); } // get the flowfile FlowFile flowFile = session.get(); if (flowFile == null) { return; } session.read(flowFile, new InputStreamCallback() { public void process(InputStream in) throws IOException { try { // get the flow file content String row = IOUtils.toString(in, "UTF-8"); // check that we have data if (row != null && !row.trim().equals("")) { //put the information which field prefix was used to the map propertyMap.put(PROPERTY_ATTRIBUTE_PREFIX_ATTRIBUTE_NAME, context.getProperty(ATTRIBUTE_PREFIX).getValue()); // Split the row into separate fields using the FIELD_SEPARATOR property String[] fields = row.split(context.getProperty(FIELD_SEPARATOR).getValue()); // loop over the fields if (fields != null && fields.length > 0) { for (int i = 0; i < fields.length; i++) { if (fields[i] != null && !fields[i].trim().equals("")) { String field = fields[i]; // remove any lineseparators field = field.replace(System.lineSeparator(), ""); // put into the map of attributes propertyMap.put(context.getProperty(ATTRIBUTE_PREFIX).getValue() + df.format(i), field); } } } } } catch (Exception ex) { ex.printStackTrace(); logger.error("Failed to split data into fields using seperator: [" + context.getProperty(FIELD_SEPARATOR).getValue() + "]"); } } }); // put the map to the flowfile flowFile = session.putAllAttributes(flowFile, propertyMap); // for provenance session.getProvenanceReporter().modifyAttributes(flowFile); // transfer the flowfile session.transfer(flowFile, SUCCESS); } @Override public Set<Relationship> getRelationships() { return relationships; } @Override public List<PropertyDescriptor> getSupportedPropertyDescriptors() { return properties; } }