com.hurence.logisland.processor.elasticsearch.EnrichRecordsElasticsearch.java Source code

Java tutorial

Introduction

Here is the source code for com.hurence.logisland.processor.elasticsearch.EnrichRecordsElasticsearch.java

Source

/**
 * Copyright (C) 2016 Hurence (support@hurence.com)
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *         http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.hurence.logisland.processor.elasticsearch;

import com.hurence.logisland.annotation.documentation.CapabilityDescription;
import com.hurence.logisland.annotation.documentation.Tags;
import com.hurence.logisland.classloading.PluginProxy;
import com.hurence.logisland.component.PropertyDescriptor;
import com.hurence.logisland.processor.ProcessContext;
import com.hurence.logisland.processor.ProcessError;
import com.hurence.logisland.record.FieldDictionary;
import com.hurence.logisland.record.Record;
import com.hurence.logisland.service.elasticsearch.multiGet.InvalidMultiGetQueryRecordException;
import com.hurence.logisland.service.elasticsearch.multiGet.MultiGetQueryRecord;
import com.hurence.logisland.service.elasticsearch.multiGet.MultiGetQueryRecordBuilder;
import com.hurence.logisland.service.elasticsearch.multiGet.MultiGetResponseRecord;
import com.hurence.logisland.validator.StandardValidators;
import org.apache.commons.lang3.tuple.ImmutableTriple;
import org.apache.commons.lang3.tuple.Triple;

import java.util.*;
import java.util.function.Function;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;

@Tags({ "elasticsearch" })
@CapabilityDescription("Enrich input records with content indexed in elasticsearch using multiget queries.\n"
        + "Each incoming record must be possibly enriched with information stored in elasticsearch. \n"
        + "Each outcoming record holds at least the input record plus potentially one or more fields coming from of one elasticsearch document.")
public class EnrichRecordsElasticsearch extends AbstractElasticsearchProcessor {

    public static final PropertyDescriptor RECORD_KEY_FIELD = new PropertyDescriptor.Builder().name("record.key")
            .description(
                    "The name of field in the input record containing the document id to use in ES multiget query")
            .required(true).addValidator(StandardValidators.NON_EMPTY_VALIDATOR).expressionLanguageSupported(true)
            .build();

    public static final PropertyDescriptor ES_INDEX_FIELD = new PropertyDescriptor.Builder().name("es.index")
            .description("The name of the ES index to use in multiget query. ").required(true)
            .addValidator(StandardValidators.NON_EMPTY_VALIDATOR).expressionLanguageSupported(true).build();

    public static final PropertyDescriptor ES_TYPE_FIELD = new PropertyDescriptor.Builder().name("es.type")
            .description("The name of the ES type to use in multiget query.").required(false)
            .defaultValue("default").expressionLanguageSupported(true).build();

    public static final PropertyDescriptor ES_INCLUDES_FIELD = new PropertyDescriptor.Builder()
            .name("es.includes.field").description("The name of the ES fields to include in the record.")
            .required(false).addValidator(StandardValidators.NON_EMPTY_VALIDATOR).expressionLanguageSupported(true)
            .defaultValue("*").build();

    public static final PropertyDescriptor ES_EXCLUDES_FIELD = new PropertyDescriptor.Builder()
            .name("es.excludes.field").description("The name of the ES fields to exclude.").required(false)
            .addValidator(StandardValidators.NON_EMPTY_VALIDATOR).defaultValue("N/A").build();

    private static final String ATTRIBUTE_MAPPING_SEPARATOR = ":";
    private static final String ATTRIBUTE_MAPPING_SEPARATOR_REGEXP = "\\s*" + ATTRIBUTE_MAPPING_SEPARATOR + "\\s*";

    private String[] excludesArray = null;

    @Override
    public List<PropertyDescriptor> getSupportedPropertyDescriptors() {

        List<PropertyDescriptor> props = new ArrayList<>();
        props.add(ELASTICSEARCH_CLIENT_SERVICE);
        props.add(RECORD_KEY_FIELD);
        props.add(ES_INDEX_FIELD);
        props.add(ES_TYPE_FIELD);
        props.add(ES_INCLUDES_FIELD);
        props.add(ES_EXCLUDES_FIELD);

        return Collections.unmodifiableList(props);
    }

    @Override
    public void init(final ProcessContext context) {
        super.init(context);
        String excludesFieldName = context.getPropertyValue(ES_EXCLUDES_FIELD).asString();
        if ((excludesFieldName != null) && (!excludesFieldName.isEmpty())) {
            excludesArray = excludesFieldName.split("\\s*,\\s*");
        }
    }

    /**
     * process events
     *
     * @param context
     * @param records
     * @return
     */
    @Override
    public Collection<Record> process(final ProcessContext context, final Collection<Record> records) {
        if (records.size() == 0) {
            return Collections.emptyList();
        }

        List<Triple<Record, String, IncludeFields>> recordsToEnrich = new ArrayList<>();
        MultiGetQueryRecordBuilder mgqrBuilder = new MultiGetQueryRecordBuilder();

        mgqrBuilder.excludeFields(excludesArray);

        for (Record record : records) {

            String recordKeyName = evaluatePropAsString(record, context, RECORD_KEY_FIELD);
            String indexName = evaluatePropAsString(record, context, ES_INDEX_FIELD);
            String typeName = evaluatePropAsString(record, context, ES_TYPE_FIELD);
            String includesFieldName = evaluatePropAsString(record, context, ES_INCLUDES_FIELD);

            if (recordKeyName != null && indexName != null && typeName != null) {
                try {
                    // Includes :
                    String[] includesArray = null;
                    if ((includesFieldName != null) && (!includesFieldName.isEmpty())) {
                        includesArray = includesFieldName.split("\\s*,\\s*");
                    }
                    IncludeFields includeFields = new IncludeFields(includesArray);
                    mgqrBuilder.add(indexName, typeName, includeFields.getAttrsToIncludeArray(), recordKeyName);
                    recordsToEnrich.add(new ImmutableTriple(record, asUniqueKey(indexName, typeName, recordKeyName),
                            includeFields));
                } catch (Throwable t) {
                    record.setStringField(FieldDictionary.RECORD_ERRORS, "Can not request ElasticSearch with "
                            + indexName + " " + typeName + " " + recordKeyName);
                    getLogger().error(
                            "Can not request ElasticSearch with index: {}, type: {}, recordKey: {}, record id is :\n{}",
                            new Object[] { indexName, typeName, recordKeyName, record.getId() }, t);
                }
            } else {
                getLogger().warn(
                        "Can not request ElasticSearch with "
                                + "index: {}, type: {}, recordKey: {}, record id is :\n{}",
                        new Object[] { indexName, typeName, recordKeyName, record.getId() });
            }
        }

        List<MultiGetResponseRecord> multiGetResponseRecords = null;
        try {
            List<MultiGetQueryRecord> mgqrs = mgqrBuilder.build();
            if (mgqrs.isEmpty())
                return records;
            multiGetResponseRecords = elasticsearchClientService.multiGet(mgqrs);
        } catch (InvalidMultiGetQueryRecordException e) {
            getLogger().error("error while multiGet elasticsearch", e);
        }

        if (multiGetResponseRecords == null || multiGetResponseRecords.isEmpty()) {
            return records;
        }

        // Transform the returned documents from ES in a Map
        Map<String, MultiGetResponseRecord> responses = multiGetResponseRecords.stream()
                .collect(Collectors.toMap(EnrichRecordsElasticsearch::asUniqueKey, Function.identity()));

        recordsToEnrich.forEach(recordToEnrich -> {

            Triple<Record, String, IncludeFields> triple = recordToEnrich;
            Record outputRecord = triple.getLeft();
            String key = triple.getMiddle();
            IncludeFields includeFields = triple.getRight();

            MultiGetResponseRecord responseRecord = responses.get(key);
            if ((responseRecord != null) && (responseRecord.getRetrievedFields() != null)) {
                // Retrieve the fields from responseRecord that matches the ones in the recordToEnrich.
                responseRecord.getRetrievedFields().forEach((fieldName, v) -> {
                    if (includeFields.includes(fieldName)) {
                        // Now check if there is an attribute mapping rule to apply
                        if (includeFields.hasMappingFor(fieldName)) {
                            String mappedAttributeName = includeFields.getAttributeToMap(fieldName);
                            // Replace the attribute name
                            outputRecord.setStringField(mappedAttributeName, v);
                        } else {
                            outputRecord.setStringField(fieldName, v);
                        }
                    }
                });
            }
        });

        return records;
    }

    private String evaluatePropAsString(Record record, ProcessContext context, PropertyDescriptor descriptor) {
        try {
            return context.getPropertyValue(descriptor).evaluate(record).asString();
        } catch (Throwable t) {
            record.addError(ProcessError.RECORD_CONVERSION_ERROR.toString(),
                    " :Failure in executing EL for property '" + descriptor.getName() + "'. Error: "
                            + t.getMessage());
            getLogger().error("Cannot interpret EL for property '{}' and record : '{}'",
                    new Object[] { descriptor.getName(), record }, t);
            return null;
        }
    }

    /*
     * Returns true if the array of attributes to include contains at least one attribute mapping
     */
    private boolean hasAttributeMapping(String[] includesArray) {
        boolean attrMapping = false;
        for (String includePattern : includesArray) {
            if (includePattern.contains(":")) {
                return true;
            }
        }
        return false;
    }

    private static String asUniqueKey(MultiGetResponseRecord mgrr) {
        return asUniqueKey(mgrr.getIndexName(), mgrr.getTypeName(), mgrr.getDocumentId());
    }

    private static String asUniqueKey(String indexName, String typeName, String documentId) {
        StringBuilder sb = new StringBuilder();
        sb.append(indexName).append(":").append(typeName).append(":").append(documentId);
        return sb.toString();
    }

    class IncludeFields {
        private HashMap<String, String> attributesMapping = null;
        boolean hasAttributeMapping = false;
        private Set<String> attrsToIncludeList = null;

        public String[] getAttrsToIncludeArray() {
            return attrsToIncludeArray;
        }

        private String[] attrsToIncludeArray = null;

        private boolean containsAll = false;
        private boolean containsSubstring = false;
        private boolean containsEquality = false;

        Set<String> equalityFields = null;
        Map<String, Pattern> substringFields = null;

        /*
         * Constructor
         */
        public IncludeFields(String[] includesArray) {
            if (includesArray == null || includesArray.length <= 0) {
                containsAll = true;
                this.attrsToIncludeArray = includesArray;
            } else {
                for (String includePattern : includesArray) {
                    if (includePattern.contains(ATTRIBUTE_MAPPING_SEPARATOR)) {
                        hasAttributeMapping = true;
                        attrsToIncludeList = new HashSet<String>();
                        attributesMapping = new HashMap<String, String>();
                        break;
                    }
                }

                Arrays.stream(includesArray).forEach((k) -> {
                    if (k.equals("*")) {
                        containsAll = true;
                        if (hasAttributeMapping) {
                            attrsToIncludeList.add(k);
                        }
                    } else if (k.contains("*")) {
                        // It is a substring
                        if (containsSubstring == false) {
                            substringFields = new HashMap<>();
                            containsSubstring = true;
                        }
                        String buildCompileStr = k.replaceAll("\\*", ".\\*");
                        Pattern pattern = Pattern.compile(buildCompileStr);
                        substringFields.put(k, pattern);
                        if (hasAttributeMapping) {
                            attrsToIncludeList.add(k);
                        }
                    } else {
                        if (containsEquality == false) {
                            equalityFields = new HashSet<String>();
                            containsEquality = true;
                        }
                        if (hasAttributeMapping) {
                            if (k.contains(ATTRIBUTE_MAPPING_SEPARATOR)) {
                                String[] splited = k.split(ATTRIBUTE_MAPPING_SEPARATOR_REGEXP);
                                if (splited.length == 2) {
                                    attrsToIncludeList.add(splited[1]);
                                    attributesMapping.put(splited[1], splited[0]);
                                    equalityFields.add(splited[1]);
                                }
                            } else {
                                equalityFields.add(k);
                                attrsToIncludeList.add(k);
                            }
                        } else {
                            equalityFields.add(k);
                        }
                    }
                });
                if (hasAttributeMapping) {
                    this.attrsToIncludeArray = (String[]) attrsToIncludeList
                            .toArray(new String[attrsToIncludeList.size()]);
                } else {
                    this.attrsToIncludeArray = includesArray;
                }
            }
        }

        public boolean hasMappingFor(String attr) {
            if ((attributesMapping == null) || attributesMapping.isEmpty() || (attr == null)) {
                return false;
            }
            return attributesMapping.containsKey(attr);
        }

        public String getAttributeToMap(String attr) {
            if ((attributesMapping == null) || attributesMapping.isEmpty() || (attr == null)) {
                return null;
            }
            return attributesMapping.get(attr);
        }

        public boolean matchesAll() {
            return containsAll;
        }

        public boolean containsSubstring() {
            return containsSubstring;
        }

        public boolean containsEquality() {
            return containsEquality;
        }

        public boolean includes(String fieldName) {
            if (containsAll) {
                return true;
            }
            if (containsEquality) {
                if (equalityFields.contains(fieldName)) {
                    return true;
                }
            }
            if (containsSubstring) {
                // Must go through each substring expresssion
                for (String key : substringFields.keySet()) {
                    Pattern expr = substringFields.get(key);
                    Matcher valueMatcher = expr.matcher(fieldName);
                    if (expr != null) {
                        try {
                            if (valueMatcher.lookingAt()) {
                                return true;
                            }
                        } catch (Exception e) {
                            getLogger().warn("issue while matching on fieldname {}", new Object[] { fieldName }, e);
                        }
                    }
                }
            }
            return false;
        }
    }
}