Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package it.ecubecenter.processors.sentiment; import org.apache.nifi.components.PropertyDescriptor; import org.apache.nifi.components.ValidationContext; import org.apache.nifi.components.ValidationResult; import org.apache.nifi.components.Validator; import org.apache.nifi.flowfile.FlowFile; import org.apache.nifi.logging.ComponentLog; import org.apache.nifi.processor.*; import org.apache.commons.io.IOUtils; import org.apache.nifi.annotation.behavior.ReadsAttribute; import org.apache.nifi.annotation.behavior.ReadsAttributes; import org.apache.nifi.annotation.behavior.SideEffectFree; import org.apache.nifi.annotation.behavior.WritesAttribute; import org.apache.nifi.annotation.behavior.WritesAttributes; import org.apache.nifi.annotation.lifecycle.OnScheduled; import org.apache.nifi.annotation.documentation.CapabilityDescription; import org.apache.nifi.annotation.documentation.Tags; import org.apache.nifi.processor.exception.ProcessException; import org.apache.nifi.processor.io.InputStreamCallback; import java.io.IOException; import java.io.InputStream; import java.util.*; import java.util.concurrent.atomic.AtomicReference; /** * This processor performs a sentiment analysis on the attribute specified (or the content of the Flow File if * no attribute is provided). The result of the analysis is returned in the attributes X.sentiment.category and X.sentiment.sentences.scores, * where X is the name of the attribute to be analyzed * @author Marco Gaido, eCube srl (gaido@ecubecenter.it) * */ @SideEffectFree @Tags({ "sentiment", "analysis", "text" }) @CapabilityDescription("This processor performs a sentiment analysis on the attribute specified (or the content of the Flow File if " + "no attribute is provided). The result of the analysis is returned in the attributes X.sentiment.category and X.sentiment.sentences.scores, " + "where X is the name of the attribute to be analyzed.") @ReadsAttributes({ @ReadsAttribute(attribute = "the attribute specified in the proper property", description = "It must contain a text to be analyzed") }) @WritesAttributes({ @WritesAttribute(attribute = "X.sentiment.category", description = "The overall sentiment category of the text. " + "It can be \"Very Negative\", \"Negative\", \"Neutral\", \"Positive\" and \"Very Positive\"."), @WritesAttribute(attribute = "X.sentiment.sentences.scores", description = "The detailed scores for each sentence in the text to be analyzed.") }) public class SentimentAnalyzer extends AbstractProcessor { public static final PropertyDescriptor LANGUAGE_PROPERTY = new PropertyDescriptor.Builder().name("Language") .description( "The language of the content to be analyzed (as of now only \"en\", i.e. Engligh, is available).") .required(true).defaultValue("en").addValidator(new Validator() { @Override public ValidationResult validate(final String subject, final String value, final ValidationContext context) { return new ValidationResult.Builder().subject(subject).input(value) .valid(value != null && value.equals("en")) .explanation(subject + " can be only \"en\".").build(); } }).build(); public static final PropertyDescriptor ATTRIBUTE_TO_ANALYZE_PROPERTY = new PropertyDescriptor.Builder() .name("Attribute to analyze") .description( "The attribute to analyze for the sentiment analysis. If it is empty it will use the content of the flow file.") .required(true).defaultValue("").addValidator(new Validator() { @Override public ValidationResult validate(final String subject, final String value, final ValidationContext context) { return new ValidationResult.Builder().subject(subject).input(value).valid(true).explanation( "Any value is allowed, but it should be empty or contain a valid attribute name.") .build(); } }).build(); public static final Relationship SUCCESS_RELATIONSHIP = new Relationship.Builder().name("SUCCESS") .description("Output relationship containing the result of the sentiment analysis.").build(); public static final Relationship FAILURE_RELATIONSHIP = new Relationship.Builder().name("FAILURE").description( "Output relationship if a failure occours, e.g. the attribute specified doesn't exitst or it is empty.") .build(); private List<PropertyDescriptor> descriptors; private Set<Relationship> relationships; @Override protected void init(final ProcessorInitializationContext context) { final List<PropertyDescriptor> descriptors = new ArrayList<>(); descriptors.add(LANGUAGE_PROPERTY); descriptors.add(ATTRIBUTE_TO_ANALYZE_PROPERTY); this.descriptors = Collections.unmodifiableList(descriptors); final Set<Relationship> relationships = new HashSet<>(); relationships.add(SUCCESS_RELATIONSHIP); relationships.add(FAILURE_RELATIONSHIP); this.relationships = Collections.unmodifiableSet(relationships); } @Override public Set<Relationship> getRelationships() { return this.relationships; } @Override public final List<PropertyDescriptor> getSupportedPropertyDescriptors() { return descriptors; } @OnScheduled public void onScheduled(final ProcessContext context) { } @Override public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException { final ComponentLog log = getLogger(); final AtomicReference<String> atomicStringToAnalyze = new AtomicReference<>(); FlowFile flowFile = session.get(); if (flowFile == null) { return; } String attributeToBeUsed = context.getProperty(ATTRIBUTE_TO_ANALYZE_PROPERTY).getValue(); if (attributeToBeUsed == null || attributeToBeUsed.equals("")) { attributeToBeUsed = ""; log.info("Start reading the flow file content in order to perform the sentiment analysis."); session.read(flowFile, new InputStreamCallback() { @Override public void process(InputStream in) throws IOException { atomicStringToAnalyze.set(IOUtils.toString(in)); } }); } else { log.info("Getting the content of attribute " + attributeToBeUsed + "in order to perform the sentiment analysis."); atomicStringToAnalyze.set(flowFile.getAttribute(attributeToBeUsed)); } String stringToAnalyze = atomicStringToAnalyze.get(); if (stringToAnalyze == null || stringToAnalyze.equals("")) { log.warn("The attribute to be analyzed doesn't exist or it is empty."); session.transfer(flowFile, FAILURE_RELATIONSHIP); return; } SentimentModel model = SentimentModel.getInstance(); List<double[]> sentiments = model.getSentencesSentiment(stringToAnalyze); flowFile = session.putAttribute(flowFile, attributeToBeUsed + ".sentiment.category", SentimentModel.getOverallSentiment(sentiments)); flowFile = session.putAttribute(flowFile, attributeToBeUsed + ".sentiment.sentences.scores", stringifyListOfSentiments(sentiments)); session.transfer(flowFile, SUCCESS_RELATIONSHIP); } private static String stringifyListOfSentiments(List<double[]> sentiments) { StringBuilder sb = new StringBuilder("["); for (double[] sent : sentiments) { sb.append("{\"Very Negative\":").append(sent[SentimentModel.VERY_NEGATIVE]).append(","); sb.append("\"Negative\":").append(sent[SentimentModel.NEGATIVE]).append(","); sb.append("\"Neutral\":").append(sent[SentimentModel.NEUTRAL]).append(","); sb.append("\"Positive\":").append(sent[SentimentModel.POSITIVE]).append(","); sb.append("\"Very Positive\":").append(sent[SentimentModel.VERY_POSITIVE]).append("},"); } sb.setCharAt(sb.length() - 1, ']'); return sb.toString(); } }