Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.nifi.processors.ConvertJSONtoCSV; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.apache.nifi.annotation.behavior.InputRequirement; import org.apache.nifi.annotation.behavior.WritesAttribute; import org.apache.nifi.annotation.documentation.CapabilityDescription; import org.apache.nifi.annotation.documentation.Tags; import org.apache.nifi.annotation.lifecycle.OnScheduled; import org.apache.nifi.components.AllowableValue; import org.apache.nifi.components.PropertyDescriptor; import org.apache.nifi.flowfile.FlowFile; import org.apache.nifi.processor.*; import org.apache.nifi.processor.exception.ProcessException; import org.apache.nifi.processor.io.InputStreamCallback; import org.apache.nifi.processor.io.OutputStreamCallback; import org.apache.nifi.processor.io.StreamCallback; import org.apache.nifi.processor.util.StandardValidators; import org.json.JSONArray; import org.json.JSONException; import org.json.JSONObject; import org.json.JSONTokener; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.util.*; import java.util.concurrent.atomic.AtomicReference; @Tags({ "JSON, CSV, convert" }) @InputRequirement(InputRequirement.Requirement.INPUT_REQUIRED) @WritesAttribute(attribute = "mime.type", description = "Sets the mime type to application/csv") @CapabilityDescription("Converts a JSON document to CSV. This processor reads the entire content " + "of incoming FlowFiles into memory in order to perform the conversion. The processor will parse JSON Arrays, JSON Objects " + "and the combination of the two regardless of the level of nesting in the JSON document.") public class ConvertJSONtoCSV extends AbstractProcessor { volatile String delimiter; volatile String removeFields; volatile String emptyFields = ""; public static final AllowableValue INCLUDE_HEADER_TRUE = new AllowableValue("True", "True", "Creates headers for each JSON file."); public static final AllowableValue INCLUDE_HEADER_FALSE = new AllowableValue("False", "False", "Only parses the JSON fields and does not include headers"); public static final PropertyDescriptor DELIMITER = new PropertyDescriptor.Builder().name("CSV Delimiter") .description("Delimiter used for the generated CSV output (Example: , | -)").required(true) .addValidator(StandardValidators.NON_EMPTY_VALIDATOR).build(); public static final PropertyDescriptor REMOVE_FIELDS = new PropertyDescriptor.Builder() .name("Remove JSON Fields/Columns") .description( "Comma delimited list of columns that should be removed when parsing JSON and building the CSV. This includes all top level and most granular nested fields/columns. By default with nothing specified every field will be parsed.") .required(false).addValidator(StandardValidators.NON_EMPTY_VALIDATOR).build(); public static final PropertyDescriptor EMPTY_FIELDS = new PropertyDescriptor.Builder().name("Empty field value") .description( "During denormalization/flattening of the JSON the value that will be substituted for empty fields values (Example: NULL). Defaults to empty string if not specified.") .required(false).addValidator(StandardValidators.NON_EMPTY_VALIDATOR).build(); public static final PropertyDescriptor INCLUDE_HEADERS = new PropertyDescriptor.Builder() .name("Include Headers").description("Whether or not to include headers in the CSV output") .required(true).allowableValues(INCLUDE_HEADER_TRUE, INCLUDE_HEADER_FALSE) .defaultValue(INCLUDE_HEADER_TRUE.getValue()).addValidator(StandardValidators.NON_EMPTY_VALIDATOR) .build(); public static final Relationship RELATIONSHIP_SUCCESS = new Relationship.Builder().name("success") .description("Successfully parsing the JSON file to CSV ").build(); public static final Relationship RELATIONSHIP_FAILURE = new Relationship.Builder().name("failure") .description("Failed parsing the JSON file to CSV ").build(); private List<PropertyDescriptor> descriptors; private Set<Relationship> relationships; @Override protected void init(final ProcessorInitializationContext context) { final List<PropertyDescriptor> descriptors = new ArrayList<PropertyDescriptor>(); descriptors.add(DELIMITER); descriptors.add(REMOVE_FIELDS); descriptors.add(EMPTY_FIELDS); descriptors.add(INCLUDE_HEADERS); this.descriptors = Collections.unmodifiableList(descriptors); final Set<Relationship> relationships = new HashSet<Relationship>(); relationships.add(RELATIONSHIP_SUCCESS); relationships.add(RELATIONSHIP_FAILURE); this.relationships = Collections.unmodifiableSet(relationships); } @Override public Set<Relationship> getRelationships() { return this.relationships; } @Override public final List<PropertyDescriptor> getSupportedPropertyDescriptors() { return descriptors; } @OnScheduled public void onScheduled(final ProcessContext context) { delimiter = context.getProperty(DELIMITER).getValue(); removeFields = context.getProperty(REMOVE_FIELDS).getValue() == null ? "" : context.getProperty(REMOVE_FIELDS).getValue(); emptyFields = context.getProperty(EMPTY_FIELDS).getValue(); } @Override public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException { final String includeHeaders = context.getProperty(INCLUDE_HEADERS).getValue(); FlowFile flowFile = session.get(); if (flowFile == null) { return; } try { flowFile = session.write(flowFile, new StreamCallback() { @Override public void process(final InputStream inputStream, final OutputStream outputStream) throws IOException { List<Map<String, String>> flatJson; try { flatJson = JSONParser.parseJSON(IOUtils.toString(inputStream, "UTF-8"), getRemoveKeySet()); if (flatJson == null) { throw new IOException( "Unable to parse JSON file. Please check the file contains valid JSON structure"); } } catch (JSONException ex) { throw new JSONException("Unable to parse as JSON appears to be malformed: " + ex); } outputStream.write(CSVGenerator.generateCSV(flatJson, delimiter, emptyFields, includeHeaders) .toString().getBytes()); } }); session.transfer(flowFile, RELATIONSHIP_SUCCESS); } catch (ProcessException | JSONException ex) { getLogger().error("Error converting FlowFile to CSV due to {}", new Object[] { ex.getMessage() }, ex); session.transfer(flowFile, RELATIONSHIP_FAILURE); } } private Set<String> getRemoveKeySet() { Set<String> setRemove = new HashSet<>(); final String[] fields = removeFields.split(","); for (String field : fields) { setRemove.add(field); } return setRemove; } private static class JSONParser { private static final Class<?> JSON_OBJECT = JSONObject.class; private static final Class<?> JSON_ARRAY = JSONArray.class; private static Set<String> removeKeySet = new HashSet<>(); public static List<Map<String, String>> parseJSON(String json, Set<String> removeKeys) throws JSONException { List<Map<String, String>> flatJson = null; removeKeySet = removeKeys; // we don't know if we are dealing with a JSON object or an array // not handling this properly causes un-necessary exception to be thrown // instantiate to a generic object and check what we are dealing with Object objJSON = new JSONTokener(json).nextValue(); try { if (objJSON instanceof JSONObject) { JSONObject jsonObject = new JSONObject(json); flatJson = new ArrayList<Map<String, String>>(); flatJson.add(parse(jsonObject)); } else if (objJSON instanceof JSONArray) { flatJson = handleAsArray(json); } } catch (JSONException ex) { throw new JSONException( "JSON might be malformed, please check and make sure it is valid JSON format: " + ex); } return flatJson; } public static Map<String, String> parse(JSONObject jsonObject) { Map<String, String> flatJson = new LinkedHashMap<String, String>(); try { denormalize(jsonObject, flatJson, ""); } catch (Exception ex) { throw ex; } return flatJson; } public static List<Map<String, String>> parse(JSONArray jsonArray) { JSONObject jsonObject = null; List<Map<String, String>> flatJson = new ArrayList<Map<String, String>>(); int length = jsonArray.length(); for (int i = 0; i < length; i++) { jsonObject = jsonArray.getJSONObject(i); Map<String, String> stringMap = parse(jsonObject); flatJson.add(stringMap); } return flatJson; } private static List<Map<String, String>> handleAsArray(String json) throws JSONException { List<Map<String, String>> flatJson = null; try { JSONArray jsonArray = new JSONArray(json); flatJson = parse(jsonArray); } catch (JSONException e) { throw e; } return flatJson; } private static void denormalize(JSONObject obj, Map<String, String> flatJson, String denormalizedColumn) { obj.keySet().removeAll(removeKeySet); Iterator<?> iterator = obj.keys(); String _denormalizedColumn = denormalizedColumn != "" ? denormalizedColumn + "." : ""; while (iterator.hasNext()) { String key = iterator.next().toString(); if (obj.get(key).getClass() == JSON_OBJECT) { JSONObject jsonObject = (JSONObject) obj.get(key); denormalize(jsonObject, flatJson, _denormalizedColumn + key); } else if (obj.get(key).getClass() == JSON_ARRAY) { JSONArray jsonArray = (JSONArray) obj.get(key); if (jsonArray.length() < 1) { continue; } denormalize(jsonArray, flatJson, _denormalizedColumn + key); } else { String value = obj.get(key).toString(); if (value != null && !value.equals("null")) { flatJson.put(_denormalizedColumn + key, value); } } } } private static void denormalize(JSONArray obj, Map<String, String> flatJson, String denormalizedColumn) { int length = obj.length(); for (int i = 0; i < length; i++) { if (obj.get(i).getClass() == JSON_ARRAY) { JSONArray jsonArray = (JSONArray) obj.get(i); if (jsonArray.length() < 1) { continue; } denormalize(jsonArray, flatJson, denormalizedColumn + "[" + i + "]"); } else if (obj.get(i).getClass() == JSON_OBJECT) { JSONObject jsonObject = (JSONObject) obj.get(i); denormalize(jsonObject, flatJson, denormalizedColumn + "[" + (i + 1) + "]"); } else { String value = obj.get(i).toString(); if (value != null) { flatJson.put(denormalizedColumn + "[" + (i + 1) + "]", value); } } } } } private static class CSVGenerator { public static StringBuilder generateCSV(List<Map<String, String>> flatJson, String separator, String emptyFields, String includeHeaders) { Set<String> headers = createHeaders(flatJson); StringBuilder csvData = new StringBuilder(); if (INCLUDE_HEADER_TRUE.equals(includeHeaders)) { csvData.append(StringUtils.join(headers.toArray(), separator) + "\n"); } for (Map<String, String> map : flatJson) { csvData.append(getSeperatedColumns(headers, map, separator, emptyFields) + "\n"); } return csvData; } private static String getSeperatedColumns(Set<String> headers, Map<String, String> map, String separator, String emptyFields) { List<String> items = new ArrayList<String>(); for (String header : headers) { String value = map.get(header) == null ? "" : map.get(header).replace(",", ""); items.add(value.isEmpty() ? emptyFields : value); } return StringUtils.join(items.toArray(), separator); } private static SortedSet<String> createHeaders(List<Map<String, String>> flatJson) { SortedSet<String> headers = new TreeSet(); for (Map<String, String> map : flatJson) { headers.addAll(map.keySet()); } return headers; } } }