com.hurence.logisland.processor.ModifyId.java Source code

Java tutorial

Introduction

Here is the source code for com.hurence.logisland.processor.ModifyId.java

Source

/**
 * Copyright (C) 2016 Hurence (support@hurence.com)
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *         http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.hurence.logisland.processor;

import com.google.common.collect.Lists;
import com.hurence.logisland.annotation.documentation.CapabilityDescription;
import com.hurence.logisland.annotation.documentation.Tags;
import com.hurence.logisland.component.AllowableValue;
import com.hurence.logisland.component.PropertyDescriptor;
import com.hurence.logisland.record.FieldDictionary;
import com.hurence.logisland.record.Record;
import com.hurence.logisland.validator.StandardValidators;
import com.hurence.logisland.validator.ValidationContext;
import com.hurence.logisland.validator.ValidationResult;
import org.apache.commons.codec.binary.Hex;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.nio.charset.Charset;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.*;

@Tags({ "record", "id", "idempotent", "generate", "modify" })
@CapabilityDescription("modify id of records or generate it following defined rules")
//TODO add others tags see others processor
public class ModifyId extends AbstractProcessor {

    private static final long serialVersionUID = -270933070438408174L;

    private static final Logger logger = LoggerFactory.getLogger(ModifyId.class);

    public static final AllowableValue RANDOM_UUID_STRATEGY = new AllowableValue("randomUuid",
            "generate a random uid", "generate a randomUid using java library");

    public static final AllowableValue HASH_FIELDS_STRATEGY = new AllowableValue("hashFields",
            "generate a hash from fields", "generate a hash from fields");

    public static final AllowableValue JAVA_FORMAT_STRING_WITH_FIELDS_STRATEGY = new AllowableValue("fromFields",
            "generate a string from java pattern and fields", "generate a string from java pattern and fields");

    public static final AllowableValue TYPE_TIME_HASH_STRATEGY = new AllowableValue("typetimehash",
            "generate a concatenation of type, time and a hash from fields",
            "generate a concatenation of type, time and a hash from fields (as for generate_hash strategy)");

    public static final PropertyDescriptor STRATEGY = new PropertyDescriptor.Builder()
            .name("id.generation.strategy").description("the strategy to generate new Id").required(true)
            .allowableValues(RANDOM_UUID_STRATEGY, HASH_FIELDS_STRATEGY, JAVA_FORMAT_STRING_WITH_FIELDS_STRATEGY,
                    TYPE_TIME_HASH_STRATEGY)
            .defaultValue(RANDOM_UUID_STRATEGY.getValue()).build();

    /**
     * properties sued only in case of Hash strategy
     */
    public static final PropertyDescriptor CHARSET_TO_USE_FOR_HASH = new PropertyDescriptor.Builder()
            .name("hash.charset").description("the charset to use to hash id string (e.g. 'UTF-8')").required(true)
            .addValidator(StandardValidators.CHARACTER_SET_VALIDATOR).defaultValue("UTF-8").build();

    /**
     * properties sued only in case of Format strategy
     */
    public static final PropertyDescriptor JAVA_FORMAT_STRING = new PropertyDescriptor.Builder()
            .name("java.formatter.string")
            .description(
                    "the format to use to build id string (e.g. '%4$2s %3$2s %2$2s %1$2s' (see java Formatter)")
            .required(false).addValidator(StandardValidators.NON_EMPTY_VALIDATOR).build();

    public static final PropertyDescriptor LANGUAGE_TAG = new PropertyDescriptor.Builder().name("language.tag")
            .description("the language to use to format numbers in string").required(true)
            .addValidator(StandardValidators.LANGUAGE_TAG_VALIDATOR).allowableValues(Locale.getISOLanguages())
            .defaultValue(Locale.ENGLISH.toLanguageTag()).build();

    /**
     * properties sued only in case of Hash strategy or Format strategy
     */
    public static final PropertyDescriptor FIELDS_TO_USE = new PropertyDescriptor.Builder().name("fields.to.hash")
            .description("the comma separated list of field names (e.g. : 'policyid,date_raw'").required(true)
            .addValidator(StandardValidators.COMMA_SEPARATED_LIST_VALIDATOR)
            .addValidator(StandardValidators.NON_EMPTY_VALIDATOR).defaultValue(FieldDictionary.RECORD_VALUE)
            .build();

    //TODO determines those values dynamically, used this code to determine those
    //    Provider[] providers = Security.getProviders();
    //    for (Provider p : providers) {
    //        String providerStr = String.format("%s/%s/%f\n", p.getName(),
    //                p.getInfo(), p.getVersion());
    //        System.out.println("provider: " + p.getName());
    //        Set<Provider.Service> services = p.getServices();
    //        for (Provider.Service s : services) {
    //            if ("MessageDigest".equals(s.getType())) {
    //                System.out.printf("\t%s//%s//%s\n", s.getType(),
    //                        s.getAlgorithm(), s.getClassName());
    //            }
    //        }
    //    }
    public static final Set<String> HASH_ALGORITHMS = new HashSet<>(
            Arrays.asList("MD2", "MD5", "SHA", "SHA-224", "SHA-256", "SHA-384", "SHA-512"));

    public static final PropertyDescriptor HASH_ALGORITHM = new PropertyDescriptor.Builder().name("hash.algorithm")
            .description("the algorithme to use to hash id string (e.g. 'SHA-256'").required(true)
            .allowableValues(HASH_ALGORITHMS).addValidator(StandardValidators.HASH_ALGORITHM_VALIDATOR)
            .defaultValue("SHA-256").build();

    @Override
    protected Collection<ValidationResult> customValidate(ValidationContext context) {
        final List<ValidationResult> validationResults = new ArrayList<>(super.customValidate(context));
        if (context.getPropertyValue(STRATEGY).isSet()) {
            if (context.getPropertyValue(STRATEGY).getRawValue()
                    .equals(JAVA_FORMAT_STRING_WITH_FIELDS_STRATEGY.getValue())) {
                if (!context.getPropertyValue(JAVA_FORMAT_STRING).isSet()) {
                    validationResults.add(new ValidationResult.Builder().input(JAVA_FORMAT_STRING.getName())
                            .explanation(String.format("%s must be set when strategy is %s",
                                    JAVA_FORMAT_STRING.getName(), context.getPropertyValue(STRATEGY).getRawValue()))
                            .valid(false).build());
                }
            }
        }
        return validationResults;
    }

    private IdBuilder idBuilder = null;

    @Override
    public void init(ProcessContext context) {
        super.init(context);
        if (context.getPropertyValue(STRATEGY).isSet()) {
            if (context.getPropertyValue(STRATEGY).getRawValue().equals(RANDOM_UUID_STRATEGY.getValue())) {
                idBuilder = new IdBuilder() {
                    @Override
                    public void buildId(Record record) {
                        record.setId(UUID.randomUUID().toString());
                    }
                };
            } else if (context.getPropertyValue(STRATEGY).getRawValue().equals(HASH_FIELDS_STRATEGY.getValue())) {
                final List<String> fieldsForHash = Lists
                        .newArrayList(context.getPropertyValue(FIELDS_TO_USE).asString().split(","));

                try {
                    final MessageDigest digest = MessageDigest
                            .getInstance(context.getPropertyValue(HASH_ALGORITHM).asString());
                    final Charset charset = Charset
                            .forName(context.getPropertyValue(CHARSET_TO_USE_FOR_HASH).asString());
                    idBuilder = new IdBuilder() {
                        @Override
                        public void buildId(Record record) {
                            StringBuilder stb = new StringBuilder();
                            for (String fieldName : fieldsForHash) {
                                if (record.hasField(fieldName))
                                    stb.append(record.getField(fieldName).asString());
                            }
                            digest.update(stb.toString().getBytes(charset));
                            byte[] digested = digest.digest();
                            record.setId(Hex.encodeHexString(digested));
                        }
                    };
                } catch (NoSuchAlgorithmException e) {
                    throw new Error(
                            "This error should not happen because the validator should ensure the algorythme exist",
                            e);
                }
            } else if (context.getPropertyValue(STRATEGY).getRawValue()
                    .equals(JAVA_FORMAT_STRING_WITH_FIELDS_STRATEGY.getValue())) {
                final String[] fieldsForFormat = context.getPropertyValue(FIELDS_TO_USE).asString().split(",");
                final String format = context.getPropertyValue(JAVA_FORMAT_STRING).asString();
                final Locale local = Locale.forLanguageTag(context.getPropertyValue(LANGUAGE_TAG).asString());
                idBuilder = new IdBuilder() {
                    @Override
                    public void buildId(Record record) {
                        final Object[] valuesForFormat = new Object[fieldsForFormat.length];
                        for (int i = 0; i < valuesForFormat.length; i++) {
                            if (!record.hasField(fieldsForFormat[i])) {
                                List<String> fieldsName = Lists.newArrayList(fieldsForFormat);
                                record.addError(ProcessError.CONFIG_SETTING_ERROR.getName(),
                                        String.format(
                                                "could not build id with format : '%s' \nfields: '%s' \n because "
                                                        + "field: '%s' does not exist",
                                                format, fieldsName, fieldsForFormat[i]));
                                return;
                            }
                            valuesForFormat[i] = record.getField(fieldsForFormat[i]).getRawValue();
                        }
                        try {
                            record.setId(String.format(local, format, valuesForFormat));
                        } catch (IllegalFormatException e) {
                            // If a format string contains an illegal syntax, a format specifier that is incompatible with the given arguments,
                            // insufficient arguments given the format string, or other illegal conditions.
                            // For specification of all possible formatting errors, see the Details section of the formatter class specification.
                            record.addError(ProcessError.STRING_FORMAT_ERROR.getName(), e.getMessage());
                        } catch (NullPointerException e) {//should not happen
                            record.addError(ProcessError.CONFIG_SETTING_ERROR.getName(), e.getMessage());
                        }
                    }
                };
            } else if (context.getPropertyValue(STRATEGY).getRawValue()
                    .equals(TYPE_TIME_HASH_STRATEGY.getValue())) {
                final List<String> fieldsForHash = Lists
                        .newArrayList(context.getPropertyValue(FIELDS_TO_USE).asString().split(","));
                try {
                    final MessageDigest digest = MessageDigest
                            .getInstance(context.getPropertyValue(HASH_ALGORITHM).asString());
                    final Charset charset = Charset
                            .forName(context.getPropertyValue(CHARSET_TO_USE_FOR_HASH).asString());
                    idBuilder = new IdBuilder() {
                        @Override
                        public void buildId(Record record) {
                            StringBuilder stb = new StringBuilder();
                            for (String fieldName : fieldsForHash) {
                                stb.append(record.getField(fieldName).asString());
                            }
                            digest.update(stb.toString().getBytes(charset));
                            byte[] digested = digest.digest();
                            final String hashString = new String(digested, charset);
                            final String recordType = record.getField(FieldDictionary.RECORD_TYPE).asString();
                            final String recordTime = record.getField(FieldDictionary.RECORD_TIME).asString();
                            final String newId = String.format("%s-%s-%s", recordType, recordTime, hashString);
                            record.setId(newId);
                        }
                    };
                } catch (NoSuchAlgorithmException e) {
                    throw new Error(
                            "This error should not happen because the validator should ensure the algorythme exist",
                            e);
                }
            }
        }
    }

    @Override
    public Collection<Record> process(ProcessContext context, Collection<Record> records) {

        /**
         * set up strategy to build id
         */
        try {
            init(context);
        } catch (Throwable t) {
            logger.error("error while initializing idBuilder", t);
        }

        /**
         * build new id for all records
         */
        try {
            for (Record record : records) {
                idBuilder.buildId(record);
            }
        } catch (Throwable t) {
            logger.error("error while setting id for records", t);
        }
        return records;
    }

    @Override
    public List<PropertyDescriptor> getSupportedPropertyDescriptors() {
        final List<PropertyDescriptor> descriptors = new ArrayList<>();
        descriptors.add(STRATEGY);
        descriptors.add(FIELDS_TO_USE);
        descriptors.add(CHARSET_TO_USE_FOR_HASH);
        descriptors.add(HASH_ALGORITHM);
        descriptors.add(JAVA_FORMAT_STRING);
        descriptors.add(LANGUAGE_TAG);

        return Collections.unmodifiableList(descriptors);
    }

    interface IdBuilder {
        void buildId(Record record);
    }
}