org.apache.metron.dataloads.bulk.ThreatIntelBulkLoader.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.metron.dataloads.bulk.ThreatIntelBulkLoader.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.metron.dataloads.bulk;

import com.google.common.base.Function;
import com.google.common.base.Joiner;
import com.google.common.io.Files;
import org.apache.commons.cli.*;
import org.apache.commons.cli.ParseException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableOutputFormat;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.util.GenericOptionsParser;
import org.apache.metron.dataloads.extractor.ExtractorHandler;
import org.apache.metron.dataloads.hbase.mr.BulkLoadMapper;
import org.apache.metron.common.configuration.enrichment.SensorEnrichmentUpdateConfig;
import org.apache.metron.enrichment.converter.HbaseConverter;
import org.apache.metron.enrichment.converter.EnrichmentConverter;
import org.apache.metron.common.utils.JSONUtils;

import javax.annotation.Nullable;
import java.io.File;
import java.io.IOException;
import java.nio.charset.Charset;
import java.text.*;
import java.util.Date;

public class ThreatIntelBulkLoader {
    private static abstract class OptionHandler implements Function<String, Option> {
    }

    public enum BulkLoadOptions {
        HELP("h", new OptionHandler() {

            @Nullable
            @Override
            public Option apply(@Nullable String s) {
                return new Option(s, "help", false, "Generate Help screen");
            }
        }), TABLE("t", new OptionHandler() {
            @Nullable
            @Override
            public Option apply(@Nullable String s) {
                Option o = new Option(s, "table", true, "HBase table to import data into");
                o.setRequired(true);
                o.setArgName("HBASE_TABLE");
                return o;
            }
        }), COLUMN_FAMILY("f", new OptionHandler() {
            @Nullable
            @Override
            public Option apply(@Nullable String s) {
                Option o = new Option(s, "column_family", true, "Column family of the HBase table to import into");
                o.setRequired(true);
                o.setArgName("CF_NAME");
                return o;
            }
        }), EXTRACTOR_CONFIG("e", new OptionHandler() {
            @Nullable
            @Override
            public Option apply(@Nullable String s) {
                Option o = new Option(s, "extractor_config", true,
                        "JSON Document describing the extractor for this input data source");
                o.setArgName("JSON_FILE");
                o.setRequired(true);
                return o;
            }
        }), INPUT_DATA("i", new OptionHandler() {
            @Nullable
            @Override
            public Option apply(@Nullable String s) {
                Option o = new Option(s, "input", true,
                        "Input directory in HDFS for the data to import into HBase");
                o.setArgName("DIR");
                o.setRequired(true);
                return o;
            }
        }), AS_OF_TIME("a", new OptionHandler() {
            @Nullable
            @Override
            public Option apply(@Nullable String s) {
                Option o = new Option(s, "as_of", true,
                        "The last read timestamp to mark the records with (omit for time of execution)");
                o.setArgName("datetime");
                o.setRequired(false);
                return o;
            }
        }), AS_OF_TIME_FORMAT("z", new OptionHandler() {
            @Nullable
            @Override
            public Option apply(@Nullable String s) {
                Option o = new Option(s, "as_of_format", true,
                        "The format of the as_of time (only used in conjunction with the as_of option)");
                o.setArgName("format");
                o.setRequired(false);
                return o;
            }
        }), CONVERTER("c", new OptionHandler() {
            @Nullable
            @Override
            public Option apply(@Nullable String s) {
                Option o = new Option(s, "converter", true,
                        "The HBase converter class to use (Default is threat intel)");
                o.setArgName("class");
                o.setRequired(false);
                return o;
            }
        }), ENRICHMENT_CONFIG("n", new OptionHandler() {
            @Nullable
            @Override
            public Option apply(@Nullable String s) {
                Option o = new Option(s, "enrichment_config", true,
                        "JSON Document describing the enrichment configuration details."
                                + "  This is used to associate an enrichment type with a field type in zookeeper.");
                o.setArgName("JSON_FILE");
                o.setRequired(false);
                return o;
            }
        });
        Option option;
        String shortCode;

        BulkLoadOptions(String shortCode, OptionHandler optionHandler) {
            this.shortCode = shortCode;
            this.option = optionHandler.apply(shortCode);
        }

        public boolean has(CommandLine cli) {
            return cli.hasOption(shortCode);
        }

        public String get(CommandLine cli) {
            return cli.getOptionValue(shortCode);
        }

        public static CommandLine parse(CommandLineParser parser, String[] args) {
            try {
                CommandLine cli = parser.parse(getOptions(), args);
                if (ThreatIntelBulkLoader.BulkLoadOptions.HELP.has(cli)) {
                    printHelp();
                    System.exit(0);
                }
                return cli;
            } catch (ParseException e) {
                System.err.println("Unable to parse args: " + Joiner.on(' ').join(args));
                e.printStackTrace(System.err);
                printHelp();
                System.exit(-1);
                return null;
            }
        }

        public static void printHelp() {
            HelpFormatter formatter = new HelpFormatter();
            formatter.printHelp("ThreatIntelBulkLoader", getOptions());
        }

        public static Options getOptions() {
            Options ret = new Options();
            for (BulkLoadOptions o : BulkLoadOptions.values()) {
                ret.addOption(o.option);
            }
            return ret;
        }
    }

    private static long getTimestamp(CommandLine cli) throws java.text.ParseException {
        if (BulkLoadOptions.AS_OF_TIME.has(cli)) {
            if (!BulkLoadOptions.AS_OF_TIME_FORMAT.has(cli)) {
                throw new IllegalStateException(
                        "Unable to proceed: Specified as_of_time without an associated format.");
            } else {
                DateFormat format = new SimpleDateFormat(BulkLoadOptions.AS_OF_TIME_FORMAT.get(cli));
                Date d = format.parse(BulkLoadOptions.AS_OF_TIME.get(cli));
                return d.getTime();
            }
        } else {
            return System.currentTimeMillis();
        }
    }

    private static String readExtractorConfig(File configFile) throws IOException {
        return Joiner.on("\n").join(Files.readLines(configFile, Charset.defaultCharset()));
    }

    public static Job createJob(Configuration conf, String input, String table, String cf,
            String extractorConfigContents, long ts, HbaseConverter converter) throws IOException {
        Job job = new Job(conf);
        job.setJobName("ThreatIntelBulkLoader: " + input + " => " + table + ":" + cf);
        System.out.println("Configuring " + job.getJobName());
        job.setJarByClass(ThreatIntelBulkLoader.class);
        job.setMapperClass(org.apache.metron.dataloads.hbase.mr.BulkLoadMapper.class);
        job.setOutputFormatClass(TableOutputFormat.class);
        job.getConfiguration().set(TableOutputFormat.OUTPUT_TABLE, table);
        job.getConfiguration().set(BulkLoadMapper.COLUMN_FAMILY_KEY, cf);
        job.getConfiguration().set(BulkLoadMapper.CONFIG_KEY, extractorConfigContents);
        job.getConfiguration().set(BulkLoadMapper.LAST_SEEN_KEY, "" + ts);
        job.getConfiguration().set(BulkLoadMapper.CONVERTER_KEY, converter.getClass().getName());
        job.setOutputKeyClass(ImmutableBytesWritable.class);
        job.setOutputValueClass(Put.class);
        job.setNumReduceTasks(0);
        ExtractorHandler handler = ExtractorHandler.load(extractorConfigContents);
        handler.getInputFormatHandler().set(job, new Path(input), handler.getConfig());
        return job;
    }

    public static void main(String... argv) throws Exception {
        Configuration conf = HBaseConfiguration.create();
        String[] otherArgs = new GenericOptionsParser(conf, argv).getRemainingArgs();

        CommandLine cli = BulkLoadOptions.parse(new PosixParser(), otherArgs);
        Long ts = getTimestamp(cli);
        String input = BulkLoadOptions.INPUT_DATA.get(cli);
        String table = BulkLoadOptions.TABLE.get(cli);
        String cf = BulkLoadOptions.COLUMN_FAMILY.get(cli);
        String extractorConfigContents = readExtractorConfig(new File(BulkLoadOptions.EXTRACTOR_CONFIG.get(cli)));
        String converterClass = EnrichmentConverter.class.getName();
        if (BulkLoadOptions.CONVERTER.has(cli)) {
            converterClass = BulkLoadOptions.CONVERTER.get(cli);
        }
        SensorEnrichmentUpdateConfig sensorEnrichmentUpdateConfig = null;
        if (BulkLoadOptions.ENRICHMENT_CONFIG.has(cli)) {
            sensorEnrichmentUpdateConfig = JSONUtils.INSTANCE
                    .load(new File(BulkLoadOptions.ENRICHMENT_CONFIG.get(cli)), SensorEnrichmentUpdateConfig.class);
        }

        HbaseConverter converter = (HbaseConverter) Class.forName(converterClass).newInstance();
        Job job = createJob(conf, input, table, cf, extractorConfigContents, ts, converter);
        System.out.println(conf);
        boolean jobRet = job.waitForCompletion(true);
        if (!jobRet) {
            System.exit(1);
        }
        if (sensorEnrichmentUpdateConfig != null) {
            sensorEnrichmentUpdateConfig.updateSensorConfigs();
        }
        System.exit(0);
    }
}