org.graylog.benchmarks.pipeline.PipelinePerformanceBenchmarks.java Source code

Java tutorial

Introduction

Here is the source code for org.graylog.benchmarks.pipeline.PipelinePerformanceBenchmarks.java

Source

/**
 * This file is part of Graylog Pipeline Processor.
 *
 * Graylog Pipeline Processor is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * Graylog Pipeline Processor is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with Graylog Pipeline Processor.  If not, see <http://www.gnu.org/licenses/>.
 */
package org.graylog.benchmarks.pipeline;

import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Iterators;
import com.google.common.collect.Lists;
import com.google.common.collect.MapMaker;
import com.google.common.collect.Maps;
import com.google.common.collect.Multimap;
import com.google.common.collect.MultimapBuilder;
import com.google.common.io.LineProcessor;
import com.google.inject.AbstractModule;
import com.google.inject.Guice;
import com.google.inject.Injector;
import com.google.inject.assistedinject.FactoryModuleBuilder;
import com.google.inject.name.Names;

import au.com.bytecode.opencsv.CSVParser;

import com.codahale.metrics.ConsoleReporter;
import com.codahale.metrics.Meter;
import com.codahale.metrics.MetricRegistry;
import com.eaio.uuid.UUID;
import com.moandjiezana.toml.Toml;

import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.DefaultParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.ParseException;
import org.graylog.plugins.pipelineprocessor.ast.Pipeline;
import org.graylog.plugins.pipelineprocessor.ast.Rule;
import org.graylog.plugins.pipelineprocessor.db.PipelineDao;
import org.graylog.plugins.pipelineprocessor.db.PipelineService;
import org.graylog.plugins.pipelineprocessor.db.PipelineStreamConnectionsService;
import org.graylog.plugins.pipelineprocessor.db.RuleDao;
import org.graylog.plugins.pipelineprocessor.db.RuleService;
import org.graylog.plugins.pipelineprocessor.db.memory.InMemoryServicesModule;
import org.graylog.plugins.pipelineprocessor.functions.ProcessorFunctionsModule;
import org.graylog.plugins.pipelineprocessor.parser.PipelineRuleParser;
import org.graylog.plugins.pipelineprocessor.processors.ConfigurationStateUpdater;
import org.graylog.plugins.pipelineprocessor.processors.PipelineInterpreter;
import org.graylog.plugins.pipelineprocessor.rest.PipelineConnections;
import org.graylog2.database.NotFoundException;
import org.graylog2.grok.GrokPatternService;
import org.graylog2.grok.InMemoryGrokPatternService;
import org.graylog2.plugin.Message;
import org.graylog2.plugin.Tools;
import org.graylog2.plugin.alarms.AlertCondition;
import org.graylog2.plugin.database.Persisted;
import org.graylog2.plugin.database.ValidationException;
import org.graylog2.plugin.database.validators.ValidationResult;
import org.graylog2.plugin.database.validators.Validator;
import org.graylog2.plugin.streams.Output;
import org.graylog2.plugin.streams.Stream;
import org.graylog2.plugin.streams.StreamRule;
import org.graylog2.rest.resources.streams.requests.CreateStreamRequest;
import org.graylog2.shared.bindings.SchedulerBindings;
import org.graylog2.shared.bindings.providers.MetricRegistryProvider;
import org.graylog2.shared.journal.Journal;
import org.graylog2.shared.journal.NoopJournal;
import org.graylog2.streams.StreamImpl;
import org.graylog2.streams.StreamService;
import org.joda.time.DateTime;
import org.jooq.lambda.Seq;
import org.jooq.lambda.tuple.Tuple2;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.Param;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.TearDown;
import org.openjdk.jmh.infra.BenchmarkParams;
import org.openjdk.jmh.infra.Blackhole;
import org.openjdk.jmh.infra.IterationParams;
import org.openjdk.jmh.profile.GCProfiler;
import org.openjdk.jmh.profile.InternalProfiler;
import org.openjdk.jmh.results.AggregationPolicy;
import org.openjdk.jmh.results.IterationResult;
import org.openjdk.jmh.results.Result;
import org.openjdk.jmh.results.RunResult;
import org.openjdk.jmh.results.ScalarResult;
import org.openjdk.jmh.results.format.ResultFormatType;
import org.openjdk.jmh.runner.Runner;
import org.openjdk.jmh.runner.RunnerException;
import org.openjdk.jmh.runner.options.Options;
import org.openjdk.jmh.runner.options.OptionsBuilder;
import org.openjdk.jmh.runner.options.TimeValue;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.File;
import java.io.IOException;
import java.io.PrintStream;
import java.net.URISyntaxException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.SortedMap;
import java.util.stream.Collectors;

import javax.annotation.Nonnull;

import static com.google.common.collect.Iterables.getOnlyElement;

public class PipelinePerformanceBenchmarks {
    private static final Logger LOG = LoggerFactory.getLogger(PipelinePerformanceBenchmarks.class);
    public static final Message MESSAGE = new Message("hallo welt", "127.0.0.1", Tools.nowUTC());

    private static String benchmarkDir = System.getProperty("benchmarkDir", "benchmarks");

    private static MetricRegistry metricRegistry;

    @State(Scope.Benchmark)
    public static class PipelineConfig {

        // the parameter values are created dynamically
        @Param({})
        private String directoryName;

        @Param({ "false", "true" })
        private String codeGenerator;

        @Param({ "false", "true" })
        private String cachedStageIterators;

        private PipelineInterpreter interpreter;
        private BenchmarkConfig config;
        private Injector injector;
        private Iterator<Message> messageCycler;
        // enable when using yourkit for single runs
        //        private Controller controller;

        @Setup
        public void setup() throws Exception {

            // enable when using yourkit for single runs
            //            controller = new Controller();
            //            controller.startCPUTracing(null);
            //            controller.startAllocationRecording(null);
            //            controller.enableStackTelemetry();

            injector = Guice.createInjector(new ProcessorFunctionsModule(), new SchedulerBindings(),
                    new InMemoryServicesModule(), new AbstractModule() {
                        @Override
                        protected void configure() {
                            bind(Journal.class).to(NoopJournal.class).asEagerSingleton();
                            bind(StreamService.class).toInstance(new DummyStreamService());
                            bind(GrokPatternService.class).to(InMemoryGrokPatternService.class);
                            bind(MetricRegistry.class).toProvider(MetricRegistryProvider.class);
                            bindConstant().annotatedWith(Names.named("processbuffer_processors")).to(1);
                            bindConstant().annotatedWith(Names.named("cached_stageiterators"))
                                    .to(Boolean.valueOf(cachedStageIterators));
                            install(new FactoryModuleBuilder().build(PipelineInterpreter.State.Factory.class));
                        }
                    });

            // resolve types of benchmark configuration, to be loaded into the various services.
            Path path = Paths.get(benchmarkDir);
            Multimap<Type, File> configFiles = MultimapBuilder.enumKeys(Type.class).arrayListValues().build();
            Files.list(path.resolve(directoryName)).map(Path::toFile).filter(File::isFile).forEach(inputFile -> {
                final String name = inputFile.getName();
                if (name.endsWith(".rule")) {
                    configFiles.put(Type.RULE, inputFile);
                } else if (name.endsWith(".pipeline")) {
                    configFiles.put(Type.PIPELINE, inputFile);
                } else if (name.equals("benchmark.toml")) {
                    configFiles.put(Type.CONFIG, inputFile);
                } else if (name.endsWith(".csv")) {
                    configFiles.put(Type.MESSAGES, inputFile);
                } else {
                    LOG.warn("unrecognized file {} found, it will be ignored.", inputFile);
                }
            });

            if (configFiles.containsKey(Type.CONFIG)) {
                config = new Toml().read(getOnlyElement(configFiles.get(Type.CONFIG))).to(BenchmarkConfig.class);
            } else {
                LOG.error("The benchmark directory must include a benchmark.toml file! Aborting.");
                System.exit(-1);
            }
            final PipelineRuleParser parser = injector.getInstance(PipelineRuleParser.class);
            final RuleService ruleService = injector.getInstance(RuleService.class);

            configFiles.get(Type.RULE).forEach(file -> {
                final String ruleText = readFile(file);
                if (ruleText == null) {
                    return;
                }
                final Rule rule = parser.parseRule(ruleText, true);
                final DateTime now = Tools.nowUTC();
                final RuleDao saved = ruleService.save(RuleDao.create(null, rule.name(), null, ruleText, now, now));
                LOG.debug("Read and saved rule {} with Id {}", saved.title(), saved.id());
            });

            final PipelineService pipelineService = injector.getInstance(PipelineService.class);
            configFiles.get(Type.PIPELINE).forEach(file -> {
                final String pipelineText = readFile(file);
                if (pipelineText == null) {
                    return;
                }
                final Pipeline pipeline = parser.parsePipeline(null, pipelineText);
                final DateTime now = Tools.nowUTC();
                final PipelineDao saved = pipelineService
                        .save(PipelineDao.create(null, pipeline.name(), null, pipelineText, now, now));
                LOG.debug("Read and saved pipeline {} with Id {}", saved.title(), saved.id());
            });
            final ImmutableMap<String, PipelineDao> pipelineTitleIndex = Maps.uniqueIndex(pipelineService.loadAll(),
                    PipelineDao::title);
            final PipelineStreamConnectionsService connectionsService = injector
                    .getInstance(PipelineStreamConnectionsService.class);
            final StreamService streamService = injector.getInstance(StreamService.class);
            if (config.streams == null || config.streams.isEmpty()) {
                LOG.info("No streams defined, this benchmark won't match any messages!");
            } else {
                for (BenchmarkConfig.StreamDescription streamDescription : config.streams) {
                    final Stream stream = streamService.create(Collections.emptyMap());
                    stream.setTitle(streamDescription.name);
                    if (streamDescription.name.equals("default")) {
                        stream.setDefaultStream(true);
                    }
                    stream.setDescription(streamDescription.description);
                    stream.setDisabled(false);
                    final String id = streamService.save(stream);

                    // TODO default stream handling is really wonky now.
                    connectionsService
                            .save(PipelineConnections.create(null, stream.isDefaultStream() ? "default" : id,
                                    streamDescription.pipelines.stream().map(pipelineTitleIndex::get)
                                            .map(PipelineDao::id).collect(Collectors.toSet())));
                }
            }

            final List<Message> loadedMessages = Lists.newArrayList();
            configFiles.get(Type.MESSAGES).forEach(file -> {
                try {
                    loadedMessages.addAll(com.google.common.io.Files.readLines(file, StandardCharsets.UTF_8,
                            new CsvMessageFileProcessor()));
                } catch (IOException e) {
                    System.err.println(e.getMessage());
                    System.exit(-3);
                }
            });
            if (!loadedMessages.isEmpty()) {
                messageCycler = Iterators.cycle(loadedMessages);
            }

            if (!configFiles.containsKey(Type.MESSAGES)) {
                if ("generate".equalsIgnoreCase(config.messages)) {
                    final ArrayList<Message> objects = Lists.newArrayList();
                    Seq.range(0, 25000)
                            .forEach(i -> objects.add(new Message("hallo welt", "127.0.0.1", Tools.nowUTC())));
                    messageCycler = Iterators.cycle(objects);
                } else {
                    messageCycler = Iterators.cycle(MESSAGE);
                }
            }
            final MetricRegistry metrics = injector.getInstance(MetricRegistry.class);
            // make the registry available to the profiler
            metricRegistry = metrics;

            // toggle code generation
            ConfigurationStateUpdater.setAllowCodeGeneration(Boolean.valueOf(codeGenerator));
            interpreter = injector.getInstance(PipelineInterpreter.class);
        }

        @TearDown
        public void dumpMetrics() throws Exception {

            // enable when using yourkit for single runs
            //            if (controller != null) {
            //                controller.captureSnapshot(Controller.SNAPSHOT_WITH_HEAP);
            //            }
            final MetricRegistry metrics = injector.getInstance(MetricRegistry.class);
            // make the registry available to the profiler
            metricRegistry = metrics;
            final ConsoleReporter reporter = ConsoleReporter.forRegistry(metrics)
                    .outputTo(new PrintStream("/tmp/bench-" + directoryName + ".txt")).build();
            reporter.report();
        }

        private String readFile(File file) {
            try {
                return com.google.common.io.Files.toString(file, StandardCharsets.UTF_8);
            } catch (IOException e) {
                LOG.error("Cannot read rule file, skipping it. This will likely fail the benchmark.", e);
                return null;
            }
        }

        /**
         * Dummy stream service that only allows setting and getting stream definitions, but no rules, alert conditions, receivers or outputs.
         */
        private static class DummyStreamService implements StreamService {

            private final Map<String, Stream> store = new MapMaker().makeMap();

            @Override
            public Stream create(Map<String, Object> fields) {
                return new StreamImpl(fields);
            }

            @Override
            public Stream create(CreateStreamRequest cr, String userId) {
                Map<String, Object> streamData = Maps.newHashMap();
                streamData.put(StreamImpl.FIELD_TITLE, cr.title());
                streamData.put(StreamImpl.FIELD_DESCRIPTION, cr.description());
                streamData.put(StreamImpl.FIELD_CREATOR_USER_ID, userId);
                streamData.put(StreamImpl.FIELD_CREATED_AT, Tools.nowUTC());
                streamData.put(StreamImpl.FIELD_CONTENT_PACK, cr.contentPack());
                streamData.put(StreamImpl.FIELD_MATCHING_TYPE, cr.matchingType().toString());

                return create(streamData);
            }

            @Override
            public Stream load(String id) throws NotFoundException {
                final Stream stream = store.get(id);
                if (stream == null) {
                    throw new NotFoundException();
                }
                return stream;
            }

            @Override
            public void destroy(Stream stream) throws NotFoundException {
                if (store.remove(stream.getId()) == null) {
                    throw new NotFoundException();
                }
            }

            @Override
            public List<Stream> loadAll() {
                return ImmutableList.copyOf(store.values());
            }

            @Override
            public List<Stream> loadAllEnabled() {
                return store.values().stream().filter(stream -> !stream.getDisabled()).collect(Collectors.toList());
            }

            @Override
            public long count() {
                return store.size();
            }

            @Override
            public void pause(Stream stream) throws ValidationException {
                throw new IllegalStateException("no implemented");
            }

            @Override
            public void resume(Stream stream) throws ValidationException {
                throw new IllegalStateException("no implemented");
            }

            @Override
            public List<StreamRule> getStreamRules(Stream stream) throws NotFoundException {
                throw new IllegalStateException("no implemented");
            }

            @Override
            public List<Stream> loadAllWithConfiguredAlertConditions() {
                throw new IllegalStateException("no implemented");
            }

            @Override
            public List<AlertCondition> getAlertConditions(Stream stream) {
                throw new IllegalStateException("no implemented");
            }

            @Override
            public AlertCondition getAlertCondition(Stream stream, String conditionId) throws NotFoundException {
                throw new IllegalStateException("no implemented");
            }

            @Override
            public void addAlertCondition(Stream stream, AlertCondition condition) throws ValidationException {
                throw new IllegalStateException("no implemented");
            }

            @Override
            public void updateAlertCondition(Stream stream, AlertCondition condition) throws ValidationException {
                throw new IllegalStateException("no implemented");
            }

            @Override
            public void removeAlertCondition(Stream stream, String conditionId) {
                throw new IllegalStateException("no implemented");
            }

            @Override
            public void addAlertReceiver(Stream stream, String type, String name) {
                throw new IllegalStateException("no implemented");
            }

            @Override
            public void removeAlertReceiver(Stream stream, String type, String name) {
                throw new IllegalStateException("no implemented");
            }

            @Override
            public void addOutput(Stream stream, Output output) {
                throw new IllegalStateException("no implemented");
            }

            @Override
            public void removeOutput(Stream stream, Output output) {
                throw new IllegalStateException("no implemented");
            }

            @Override
            public void removeOutputFromAllStreams(Output output) {
                throw new IllegalStateException("no implemented");
            }

            @Override
            public <T extends Persisted> int destroy(T model) {
                throw new IllegalStateException("no implemented");
            }

            @Override
            public <T extends Persisted> int destroyAll(Class<T> modelClass) {
                throw new IllegalStateException("no implemented");
            }

            @Override
            public <T extends Persisted> String save(T model) throws ValidationException {
                store.put(model.getId(), (Stream) model);
                return model.getId();
            }

            @Override
            public <T extends Persisted> String saveWithoutValidation(T model) {
                throw new IllegalStateException("no implemented");
            }

            @Override
            public <T extends Persisted> Map<String, List<ValidationResult>> validate(T model,
                    Map<String, Object> fields) {
                throw new IllegalStateException("no implemented");
            }

            @Override
            public <T extends Persisted> Map<String, List<ValidationResult>> validate(T model) {
                throw new IllegalStateException("no implemented");
            }

            @Override
            public Map<String, List<ValidationResult>> validate(Map<String, Validator> validators,
                    Map<String, Object> fields) {
                throw new IllegalStateException("no implemented");
            }
        }

        private static class CsvMessageFileProcessor implements LineProcessor<List<Message>> {
            String[] fieldNames;
            private CSVParser csvParser = new CSVParser();
            private List<Message> messages = Lists.newArrayList();
            boolean firstLine = true;

            @Override
            public boolean processLine(@Nonnull String line) throws IOException {
                final String[] strings = csvParser.parseLine(line);
                if (strings == null) {
                    return false;
                }
                if (firstLine) {
                    fieldNames = strings;
                    firstLine = false;
                    return true;
                }

                final Map<String, Object> fields = Seq.of(fieldNames).zipWithIndex()
                        .map(nameAndIndex -> nameAndIndex.map2(index -> strings[Math.toIntExact(index)]))
                        .collect(Collectors.toMap(Tuple2::v1, Tuple2::v2));
                fields.put(Message.FIELD_ID, new UUID().toString());
                messages.add(new Message(fields));
                return true;
            }

            @Override
            public List<Message> getResult() {
                return messages;
            }
        }

        @SuppressWarnings({ "unused", "MismatchedQueryAndUpdateOfCollection" })
        private class BenchmarkConfig {
            private String name;

            private List<StreamDescription> streams;

            private String messages;

            private class StreamDescription {
                private String name;
                private String description;
                private Set<String> pipelines;
            }
        }

        /**
         * type of configuration file, either global config (name, streams, connections), a rule source file, a pipeline source file
         */
        private enum Type {
            CONFIG, RULE, MESSAGES, PIPELINE
        }
    }

    @Benchmark
    public void runPipeline(PipelineConfig config, Blackhole bh) {
        // forever loop over the messages
        bh.consume(config.interpreter.process(config.messageCycler.next()));
    }

    public static void main(String[] args) throws RunnerException, URISyntaxException, IOException {

        final org.apache.commons.cli.Options options = new org.apache.commons.cli.Options();
        new Option("b", "benchmarks", true, "Benchmark directory (default: 'benchmarks')").setRequired(false);
        options.addOption(Option.builder("b").hasArg(true).argName("directory").longOpt("benchmarks")
                .desc("Benchmark directory (default: 'benchmarks')").required(false).build());
        options.addOption("f", true,
                "Number of forks (default 1). Set to 0 to allow attaching a debugger/profiler");
        options.addOption(Option.builder("n").longOpt("name").desc("Only run benchmark with the given name")
                .required(false).argName("name").hasArg(true).build());
        options.addOption("h", "help");
        options.addOption("w", true, "Warmup iterations (default 5)");
        options.addOption("i", true, "Iterations (default 20)");

        String[] benchmarkParams = {};
        String benchmarkDir = "benchmarks";
        int forks = 1;
        int warmupIterations = 5;
        int iterations = 20;
        try {
            CommandLine line = new DefaultParser().parse(options, args);

            if (line.hasOption('h')) {
                HelpFormatter formatter = new HelpFormatter();
                formatter.printHelp("benchmark.sh", options);
                return;
            }

            benchmarkDir = line.getOptionValue('b', "benchmarks");
            benchmarkParams = loadBenchmarkNames(benchmarkDir).toArray(new String[] {});

            if (line.hasOption('n')) {
                benchmarkParams = new String[] { line.getOptionValue('n') };
            }

            if (line.hasOption('f')) {
                forks = Integer.parseInt(line.getOptionValue('f', "1"));
            }

            if (line.hasOption('w')) {
                warmupIterations = Integer.parseInt(line.getOptionValue('w', "5"));
            }
            if (line.hasOption('i')) {
                iterations = Integer.parseInt(line.getOptionValue('i', "20"));
            }
        } catch (ParseException e) {
            HelpFormatter formatter = new HelpFormatter();
            formatter.printHelp("benchmark.sh", options);
            System.exit(-2);
        }

        Options opt = new OptionsBuilder().include(PipelinePerformanceBenchmarks.class.getSimpleName())
                .warmupIterations(warmupIterations).warmupTime(TimeValue.seconds(5))
                .measurementIterations(iterations).measurementTime(TimeValue.seconds(60)).detectJvmArgs().threads(1)
                .forks(forks).param("directoryName", benchmarkParams)
                //                .param("codeGenerator", "false")
                //                .param("cachedStageIterators", "false")
                .jvmArgsAppend("-DbenchmarkDir=" + benchmarkDir).resultFormat(ResultFormatType.JSON)
                .addProfiler(GCProfiler.class).addProfiler(MetricsProfiler.class).build();

        final Runner runner = new Runner(opt);
        final Collection<RunResult> results = runner.run();
    }

    private static List<String> loadBenchmarkNames(String benchmarkDir) throws URISyntaxException, IOException {
        Path benchmarksPath = Paths.get(benchmarkDir);

        return Files.list(benchmarksPath).map(Path::toFile).filter(file -> !file.isHidden()).map(File::getName)
                .sorted().collect(Collectors.toList());
    }

    public static class MetricsProfiler implements InternalProfiler {

        @Override
        public String getDescription() {
            return "Metrics profile via MetricRegistry";
        }

        @Override
        public void beforeIteration(BenchmarkParams benchmarkParams, IterationParams iterationParams) {

        }

        @Override
        public Collection<? extends Result> afterIteration(BenchmarkParams benchmarkParams,
                IterationParams iterationParams, IterationResult result) {
            final ArrayList<Result> results = Lists.newArrayList();
            if (metricRegistry == null) {
                return results;
            }
            final SortedMap<String, Meter> counters = metricRegistry.getMeters((name, metric) -> {
                return name.startsWith(MetricRegistry.name(Rule.class))
                        || name.startsWith(MetricRegistry.name(Pipeline.class));
            });
            counters.entrySet()
                    .forEach(stringCounterEntry -> result.addResult(new ScalarResult(stringCounterEntry.getKey(),
                            stringCounterEntry.getValue().getCount(), "calls", AggregationPolicy.SUM)));

            return results;
        }
    }
}