Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.apache.nifi.processors.mongodb.gridfs; import com.mongodb.client.MongoCursor; import com.mongodb.client.gridfs.GridFSBucket; import com.mongodb.client.gridfs.model.GridFSFile; import org.apache.nifi.annotation.behavior.InputRequirement; import org.apache.nifi.annotation.behavior.WritesAttribute; import org.apache.nifi.annotation.behavior.WritesAttributes; import org.apache.nifi.annotation.documentation.CapabilityDescription; import org.apache.nifi.annotation.documentation.Tags; import org.apache.nifi.annotation.lifecycle.OnScheduled; import org.apache.nifi.components.PropertyDescriptor; import org.apache.nifi.expression.ExpressionLanguageScope; import org.apache.nifi.flowfile.FlowFile; import org.apache.nifi.flowfile.attributes.CoreAttributes; import org.apache.nifi.mongodb.MongoDBClientService; import org.apache.nifi.processor.ProcessContext; import org.apache.nifi.processor.ProcessSession; import org.apache.nifi.processor.Relationship; import org.apache.nifi.processor.exception.ProcessException; import org.apache.nifi.processor.util.JsonValidator; import org.apache.nifi.processors.mongodb.QueryHelper; import org.apache.nifi.util.StringUtils; import org.bson.Document; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; @InputRequirement(InputRequirement.Requirement.INPUT_REQUIRED) @WritesAttributes(@WritesAttribute(attribute = "gridfs.file.metadata", description = "The custom metadata stored with a file is attached to this property if it exists.")) @Tags({ "fetch", "gridfs", "mongo" }) @CapabilityDescription("Retrieves one or more files from a GridFS bucket by file name or by a user-defined query.") public class FetchGridFS extends AbstractGridFSProcessor implements QueryHelper { static final String METADATA_ATTRIBUTE = "gridfs.file.metadata"; static final PropertyDescriptor QUERY = new PropertyDescriptor.Builder().name("gridfs-query") .displayName("Query") .description("A valid MongoDB query to use to fetch one or more files from GridFS.") .expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES) .addValidator(JsonValidator.INSTANCE).required(false).build(); static final Relationship REL_ORIGINAL = new Relationship.Builder().name("original") .description( "The original input flowfile goes to this relationship if the query does not cause an error") .build(); static final List<PropertyDescriptor> PROPERTY_DESCRIPTORS; static final Set<Relationship> RELATIONSHIP_SET; static { List<PropertyDescriptor> _temp = new ArrayList<>(); _temp.addAll(PARENT_PROPERTIES); _temp.add(FILE_NAME); _temp.add(QUERY); _temp.add(QUERY_ATTRIBUTE); _temp.add(OPERATION_MODE); PROPERTY_DESCRIPTORS = Collections.unmodifiableList(_temp); Set<Relationship> _rels = new HashSet<>(); _rels.addAll(PARENT_RELATIONSHIPS); _rels.add(REL_ORIGINAL); RELATIONSHIP_SET = Collections.unmodifiableSet(_rels); } @Override public Set<Relationship> getRelationships() { return RELATIONSHIP_SET; } @Override public final List<PropertyDescriptor> getSupportedPropertyDescriptors() { return PROPERTY_DESCRIPTORS; } private String getQuery(ProcessSession session, ProcessContext context, FlowFile input) throws IOException { String queryString; if (context.getProperty(FILE_NAME).isSet()) { String fileName = context.getProperty(FILE_NAME).evaluateAttributeExpressions(input).getValue(); queryString = String.format("{ \"filename\": \"%s\"}", fileName); } else if (context.getProperty(QUERY).isSet()) { queryString = context.getProperty(QUERY).evaluateAttributeExpressions(input).getValue(); } else { ByteArrayOutputStream out = new ByteArrayOutputStream(); session.exportTo(input, out); out.close(); queryString = new String(out.toByteArray()); } return queryString; } @OnScheduled public void onScheduled(ProcessContext context) { this.clientService = context.getProperty(CLIENT_SERVICE).asControllerService(MongoDBClientService.class); } @Override public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException { FlowFile input = session.get(); if (input == null) { return; } final String operatingMode = context.getProperty(OPERATION_MODE).getValue(); final Map<String, String> originalAttributes = input.getAttributes(); String queryStr; try { queryStr = getQuery(session, context, input); if (StringUtils.isEmpty(queryStr)) { getLogger().error("No query could be found or built from the supplied input."); session.transfer(input, REL_FAILURE); return; } } catch (IOException ex) { getLogger().error("No query could be found from supplied input", ex); session.transfer(input, REL_FAILURE); return; } Document query = Document.parse(queryStr); try { final GridFSBucket bucket = getBucket(input, context); final String queryPtr = queryStr; final FlowFile parent = operatingMode.equals(MODE_ONE_COMMIT.getValue()) ? input : null; MongoCursor it = bucket.find(query).iterator(); if (operatingMode.equals(MODE_MANY_COMMITS.getValue())) { session.transfer(input, REL_ORIGINAL); input = null; } while (it.hasNext()) { GridFSFile gridFSFile = (GridFSFile) it.next(); handleFile(bucket, session, context, parent, gridFSFile, queryPtr); if (operatingMode.equals(MODE_MANY_COMMITS.getValue())) { session.commit(); } } if (input != null) { session.transfer(input, REL_ORIGINAL); } } catch (Exception ex) { getLogger().error("An error occurred wile trying to run the query.", ex); if (input != null && operatingMode.equals(MODE_ONE_COMMIT.getValue())) { session.transfer(input, REL_FAILURE); } else if (input != null && operatingMode.equals(MODE_MANY_COMMITS.getValue())) { final String queryPtr = queryStr; FlowFile cloned = session.create(); cloned = session.putAllAttributes(cloned, originalAttributes); cloned = session.write(cloned, out -> out.write(queryPtr.getBytes())); session.transfer(cloned, REL_FAILURE); } } } private void handleFile(GridFSBucket bucket, ProcessSession session, ProcessContext context, FlowFile parent, GridFSFile input, String query) { Map<String, String> attrs = new HashMap<>(); attrs.put(METADATA_ATTRIBUTE, input.getMetadata() != null ? input.getMetadata().toJson() : "{}"); if (context.getProperty(QUERY_ATTRIBUTE).isSet()) { String key = context.getProperty(QUERY_ATTRIBUTE).evaluateAttributeExpressions(parent).getValue(); attrs.put(key, query); } attrs.put(CoreAttributes.FILENAME.key(), input.getFilename()); FlowFile output = parent != null ? session.create(parent) : session.create(); output = session.write(output, out -> bucket.downloadToStream(input.getObjectId(), out)); output = session.putAllAttributes(output, attrs); session.transfer(output, REL_SUCCESS); session.getProvenanceReporter().receive(output, getTransitUri(input.getObjectId(), output, context)); } }