org.opennms.newts.gsod.ImportRunner.java Source code

Java tutorial

Introduction

Here is the source code for org.opennms.newts.gsod.ImportRunner.java

Source

/*
 * Copyright 2014, The OpenNMS Group
 * 
 * Licensed under the Apache License, Version 2.0 (the "License"); you may
 * not use this file except in compliance with the License. You may obtain
 * a copy of the License at
 * 
 *     http://www.apache.org/licenses/LICENSE-2.0
 *     
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.opennms.newts.gsod;

import static java.util.concurrent.TimeUnit.MILLISECONDS;
import static java.util.concurrent.TimeUnit.SECONDS;
import static org.opennms.newts.gsod.FileObservable.fileTreeWalker;
import static org.opennms.newts.gsod.FileObservable.lines;
import static rx.exceptions.Exceptions.propagate;

import java.io.File;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.URI;
import java.nio.file.Path;
import java.text.ParseException;
import java.util.List;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.Callable;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicReference;

import org.apache.http.entity.ContentType;
import org.apache.http.impl.nio.client.CloseableHttpAsyncClient;
import org.apache.http.impl.nio.client.HttpAsyncClients;
import org.apache.http.nio.client.methods.HttpAsyncMethods;
import org.kohsuke.args4j.Argument;
import org.kohsuke.args4j.CmdLineException;
import org.kohsuke.args4j.CmdLineParser;
import org.kohsuke.args4j.Option;
import org.opennms.newts.api.MetricType;
import org.opennms.newts.api.Sample;
import org.opennms.newts.api.SampleRepository;
import org.opennms.newts.api.Timestamp;
import org.opennms.newts.reporter.metrics.NewtsReporter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import rx.Observable;
import rx.Observable.Operator;
import rx.Observer;
import rx.Subscriber;
import rx.Subscription;
import rx.apache.http.ObservableHttp;
import rx.apache.http.ObservableHttpResponse;
import rx.exceptions.Exceptions;
import rx.functions.Action0;
import rx.functions.Func1;
import rx.functions.Functions;
import rx.schedulers.Schedulers;

import com.codahale.metrics.ConsoleReporter;
import com.codahale.metrics.Gauge;
import com.codahale.metrics.Meter;
import com.codahale.metrics.MetricRegistry;
import com.codahale.metrics.Timer;
import com.codahale.metrics.Timer.Context;
import com.google.common.util.concurrent.ListenableFuture;
import com.google.common.util.concurrent.ListeningExecutorService;
import com.google.common.util.concurrent.MoreExecutors;
import com.google.inject.Guice;
import com.google.inject.Injector;

public class ImportRunner {

    private int m_samplesPerBatch = 1000;
    private File m_source;
    private String m_restUrl = null;
    private SampleRepository m_repository;
    private int m_threadCount = 1;
    private int m_maxThreadQueueSize = 0;
    private double m_timescaleFactor = 1.0;
    private long m_timeoffset = 0;

    private void checkArgument(boolean check, String failureMessage) {
        if (!check)
            throw new IllegalArgumentException(failureMessage);
    }

    @Option(name = "-n", aliases = "--samples-per-batch", metaVar = "sample-count", usage = "the maxinum number of samples to include in each post to the repository (default: 1000)")
    public void setSamplesPerBatch(int samplesPerBatch) {
        checkArgument(samplesPerBatch > 0, "samples per batch must be greater than zero!");
        m_samplesPerBatch = samplesPerBatch;
    }

    @Option(name = "-u", aliases = "--url", metaVar = "url", usage = "publish data via a Newts REST server at the given url (default: use direct access via Newts API)")
    public void setURL(String url) {
        checkArgument(url != null && !url.isEmpty(), "the url must not be empty");
        m_restUrl = url;
    }

    @Option(name = "-p", aliases = "--parallelism", metaVar = "thread-count", usage = "when using direct the size of the thread pool that posts the results.  (defaults to 1 ie no parallelism)")
    public void setParallelism(int threadCount) {
        checkArgument(threadCount > 0, "thread count must be at least 1.");
        m_threadCount = threadCount;
    }

    @Option(name = "-q", aliases = "--max-work-queue-size", metaVar = "batch-count", usage = "when using direct the max size of the work-queue (defaults to thread-count * 3)")
    public void setMaxThreadQueueSize(int maxThreadQueueSize) {
        checkArgument(maxThreadQueueSize > 0, "max thread queue size must be at least 1.");
        m_maxThreadQueueSize = maxThreadQueueSize;
    }

    @Option(name = "-f", aliases = "--time-scale-factor", metaVar = "long", usage = "to scale down the date we compress time dividing time by this factor")
    public void setTimescaleFactor(double factor) {
        m_timescaleFactor = factor;
    }

    @Option(name = "-o", aliases = "--time-offset", metaVar = "timestamp", usage = "adjust epoch time in seconds to be <time-offset>. defaults to no offset.  'now' is allowed.")
    public void setTimeoffset(String offset) {
        if (offset.equals("now")) {
            m_timeoffset = System.currentTimeMillis();
        } else {
            m_timeoffset = Long.valueOf(offset) * 1000;
        }
    }

    @Argument(metaVar = "sourceDir", required = true, usage = "the source directory that contains gsod data to import. These must be gzip'd files")
    public void setSource(File source) {
        checkArgument(source.exists(), "the source directory " + source + " does not exist");
        checkArgument(source.isDirectory(), "the source directory must be a directory");
        m_source = source;
    }

    private static final Logger LOG = LoggerFactory.getLogger(ImportRunner.class);

    public static void main(String... args) throws Exception {
        new ImportRunner().execute(args);

    }

    public void execute(String... args) throws Exception {

        CmdLineParser parser = new CmdLineParser(this);
        try {
            parser.parseArgument(args);
        } catch (CmdLineException e) {
            // handling of wrong arguments
            System.err.println(e.getMessage());
            parser.printUsage(System.err);
            return;
        }

        // Setup the slf4j metrics reporter
        MetricRegistry metrics = new MetricRegistry();

        final long start = System.currentTimeMillis();
        metrics.register("elapsed-seconds", new Gauge<Double>() {

            @Override
            public Double getValue() {
                return (System.currentTimeMillis() - start) / 1000.0;
            }

        });

        final ConsoleReporter reporter = ConsoleReporter.forRegistry(metrics).outputTo(System.err)
                .convertRatesTo(SECONDS).convertDurationsTo(MILLISECONDS).build();

        reporter.start(10, SECONDS);

        if (m_restUrl == null) {
            // we are using a direct importer so use a NewtsReporter for storing metrics
            NewtsReporter newtsReporter = NewtsReporter.forRegistry(metrics).name("importer")
                    .convertRatesTo(SECONDS).convertDurationsTo(MILLISECONDS).build(repository());

            newtsReporter.start(1, SECONDS);
        }

        LOG.debug("Scanning {} for GSOD data files...", m_source);

        // walk the files in the directory given
        Observable<Sample> samples = fileTreeWalker(m_source.toPath()).subscribeOn(Schedulers.io())

                // set up a meter for each file processed
                .map(meter(metrics.meter("files"), Path.class))

                // report file
                .map(reportFile())

                // read all the files and convert them into lines
                .mergeMap(lines())
                // excluding the header lines
                .filter(exclude("YEARMODA"))

                // turn each line into a list of samples
                .mergeMap(samples())

                // adjust time on samples according to arguments
                .map(adjustTime())

                // meter the samples
                .map(meter(metrics.meter("samples"), Sample.class));

        Observable<List<Sample>> batches = samples
                // create batches each second or of size m_samplesPerBatch whichever comes first
                .buffer(m_samplesPerBatch);

        Observable<Boolean> doImport = m_restUrl != null ? restPoster(batches, metrics)
                : directPoster(batches, metrics);

        System.err.println("doImport = " + doImport);

        // GO!!!
        final AtomicReference<Subscription> subscription = new AtomicReference<>();
        final AtomicBoolean failed = new AtomicBoolean(false);

        final CountDownLatch latch = new CountDownLatch(1);

        Subscription s = doImport.subscribe(new Observer<Boolean>() {

            @Override
            public void onCompleted() {
                System.err.println("Finished Importing Everything!");
                reporter.report();
                latch.countDown();
                System.exit(0);
            }

            @Override
            public void onError(Throwable e) {
                failed.set(true);
                System.err.println("Error importing!");
                e.printStackTrace();
                try {
                    //latch.await();
                    Subscription s = subscription.get();
                    if (s != null)
                        s.unsubscribe();

                } catch (Exception ex) {
                    System.err.println("Failed to close httpClient!");
                    ex.printStackTrace();
                } finally {
                    //dumpThreads();
                }
            }

            @Override
            public void onNext(Boolean t) {
                System.err.println("Received a boolen: " + t);
            }
        });

        subscription.set(s);
        if (failed.get()) {
            s.unsubscribe();
        }
        //latch.countDown();
        System.err.println("Return from Subscribe!");

        latch.await();

        //dumpThreads();

    }

    private Func1<? super Sample, ? extends Sample> adjustTime() {
        return new Func1<Sample, Sample>() {

            @Override
            public Sample call(Sample s) {
                Timestamp oldTs = s.getTimestamp();
                Timestamp newTs = Timestamp
                        .fromEpochMillis(m_timeoffset + Math.round(oldTs.asMillis() / m_timescaleFactor));
                return new Sample(newTs, s.getResource(), s.getName(), s.getType(), s.getValue());
            }

        };
    }

    private SampleRepository repository() {
        if (m_repository == null) {
            Injector injector = Guice.createInjector(new Config());
            m_repository = injector.getInstance(SampleRepository.class);
        }
        return m_repository;
    }

    private Observable<Boolean> directPoster(Observable<List<Sample>> samples, MetricRegistry metrics) {

        final SampleRepository repository = repository();
        final Timer timer = metrics.timer("writes");
        final Meter completions = metrics.meter("samples-completed");

        Func1<List<Sample>, Boolean> insert = new Func1<List<Sample>, Boolean>() {

            @Override
            public Boolean call(List<Sample> s) {
                int sz = s.size();
                try (Context timerCtx = timer.time()) {
                    repository.insert(s);
                    return true;
                } finally {
                    completions.mark(sz);
                }
            }
        };

        return (m_threadCount == 1 ? samples.map(insert) : parMap(samples, metrics, insert))
                .all(Functions.<Boolean>identity());

    }

    private Observable<Boolean> parMap(Observable<List<Sample>> samples, MetricRegistry metrics,
            Func1<List<Sample>, Boolean> insert) {

        final Timer waitTime = metrics.timer("wait-time");

        @SuppressWarnings("serial")
        final BlockingQueue<Runnable> workQueue = new LinkedBlockingQueue<Runnable>(
                m_maxThreadQueueSize == 0 ? m_threadCount * 3 : m_maxThreadQueueSize) {

            @Override
            public boolean offer(Runnable r) {
                try (Context time = waitTime.time()) {
                    this.put(r);
                    return true;
                } catch (InterruptedException e) {
                    throw Exceptions.propagate(e);
                }
            }

            @Override
            public boolean add(Runnable r) {
                try (Context time = waitTime.time()) {
                    this.put(r);
                    return true;
                } catch (InterruptedException e) {
                    throw Exceptions.propagate(e);
                }
            }

        };
        final ThreadPoolExecutor executor = new ThreadPoolExecutor(m_threadCount, m_threadCount, 0L,
                TimeUnit.MILLISECONDS, workQueue);

        metrics.register("active-threads", new Gauge<Integer>() {

            @Override
            public Integer getValue() {
                return executor.getActiveCount();
            }

        });

        metrics.register("pool-size", new Gauge<Integer>() {

            @Override
            public Integer getValue() {
                return executor.getPoolSize();
            }

        });
        metrics.register("largest-pool-size", new Gauge<Integer>() {

            @Override
            public Integer getValue() {
                return executor.getLargestPoolSize();
            }

        });

        metrics.register("work-queue-size", new Gauge<Integer>() {

            @Override
            public Integer getValue() {
                return workQueue.size();
            }

        });

        return parMap(samples, executor, metrics, insert);
    }

    private Observable<Boolean> parMap(Observable<List<Sample>> samples, ExecutorService executorSvc,
            final MetricRegistry metrics, final Func1<List<Sample>, Boolean> insert) {
        final ListeningExecutorService executor = MoreExecutors.listeningDecorator(executorSvc);

        Observable<Boolean> o = samples.lift(new Operator<ListenableFuture<Boolean>, List<Sample>>() {

            @Override
            public Subscriber<? super List<Sample>> call(final Subscriber<? super ListenableFuture<Boolean>> s) {
                return new Subscriber<List<Sample>>() {

                    @Override
                    public void onCompleted() {
                        if (!s.isUnsubscribed()) {
                            s.onCompleted();
                        }
                        executor.shutdown();
                    }

                    @Override
                    public void onError(Throwable e) {
                        if (!s.isUnsubscribed()) {
                            s.onError(e);
                        }
                    }

                    @Override
                    public void onNext(final List<Sample> t) {
                        if (!s.isUnsubscribed()) {
                            try {
                                ListenableFuture<Boolean> f = executor.submit(new Callable<Boolean>() {

                                    @Override
                                    public Boolean call() throws Exception {
                                        return insert.call(t);
                                    }

                                });
                                s.onNext(f);
                            } catch (Throwable ex) {
                                onError(ex);
                            }

                        }
                    }
                };
            }

        }).observeOn(Schedulers.io()).map(new Func1<ListenableFuture<Boolean>, Boolean>() {

            @Override
            public Boolean call(ListenableFuture<Boolean> f) {
                try {
                    return f.get();
                } catch (Throwable e) {
                    throw Exceptions.propagate(e);
                }
            }

        });

        return o;
    }

    private Observable<Boolean> restPoster(Observable<List<Sample>> samples, MetricRegistry metrics) {

        final CloseableHttpAsyncClient httpClient = HttpAsyncClients.createDefault();
        httpClient.start();

        return samples

                // turn each batch into json
                .map(toJSON())

                // meter them as the go into the post code
                .map(meter(metrics.meter("posts"), String.class))

                // post the json to the REST server
                .mergeMap(postJSON(m_restUrl, httpClient))

                // meter the responses
                .map(meter(metrics.meter("responses"), ObservableHttpResponse.class))

                // count sample completions
                .map(meter(metrics.meter("samples-completed"), m_samplesPerBatch, ObservableHttpResponse.class))

                // make sure every request has a successful return code
                .all(successful())

                .doOnCompleted(new Action0() {

                    @Override
                    public void call() {
                        try {
                            httpClient.close();
                        } catch (IOException e) {
                            System.err.println("Failed to close httpClient!");
                            e.printStackTrace();
                        }
                    }

                });
    }

    private static Func1<? super Path, ? extends Path> reportFile() {
        return new Func1<Path, Path>() {

            @Override
            public Path call(Path file) {
                System.err.println("Begin Processing: " + file);
                return file;
            }

        };
    }

    public static Func1<String, Observable<Sample>> samples() {
        final LineParser parser = new LineParser();
        return new Func1<String, Observable<Sample>>() {

            @Override
            public Observable<Sample> call(String line) {
                try {
                    return Observable.from(parser.parseLine(line));
                } catch (ParseException e) {
                    throw propagate(e);
                }
            }

        };
    }

    private static boolean isNaN(Sample sample) {
        return (sample.getType() == MetricType.GAUGE) && Double.isNaN(sample.getValue().doubleValue());
    }

    public static Func1<List<Sample>, String> toJSON() {
        return new Func1<List<Sample>, String>() {
            @Override
            public String call(List<Sample> samples) {
                JSONBuilder bldr = new JSONBuilder();

                for (Sample sample : samples) {
                    if (isNaN(sample))
                        continue;
                    //System.err.println("Importing: " + sample);
                    bldr.newObject();
                    bldr.attr("timestamp", sample.getTimestamp().asMillis());
                    bldr.attr("resource", sample.getResource().getId());
                    bldr.attr("name", sample.getName());
                    bldr.attr("type", sample.getType().name());
                    if (sample.getType() == MetricType.GAUGE) {
                        bldr.attr("value", sample.getValue().doubleValue());
                    } else {
                        bldr.attr("value", sample.getValue().longValue());
                    }
                }

                return bldr.toString();
            }
        };
    }

    private static Func1<ObservableHttpResponse, Boolean> successful() {
        return new Func1<ObservableHttpResponse, Boolean>() {

            @Override
            public Boolean call(ObservableHttpResponse response) {
                if (response.getResponse().getStatusLine().getStatusCode() >= 400) {
                    throw new RuntimeException("Failed to post samples: " + response.getResponse().getStatusLine());
                }
                return true;
            }

        };
    }

    public static Func1<String, Observable<ObservableHttpResponse>> postJSON(final String baseURL,
            final CloseableHttpAsyncClient httpClient) {

        final URI baseURI = URI.create(baseURL);

        return new Func1<String, Observable<ObservableHttpResponse>>() {
            @Override
            public Observable<ObservableHttpResponse> call(String json) {
                try {
                    return ObservableHttp
                            .createRequest(HttpAsyncMethods.createPost(baseURI, json, ContentType.APPLICATION_JSON),
                                    httpClient)
                            .toObservable();
                } catch (UnsupportedEncodingException e) {
                    throw Exceptions.propagate(e);
                }
            }
        };
    }

    public static Func1<String, Boolean> exclude(final String pattern) {
        return new Func1<String, Boolean>() {

            @Override
            public Boolean call(String s) {
                return !s.contains(pattern);
            }

        };
    }

    public static <T> Func1<T, T> meter(final Meter meter, Class<T> clazz) {
        return meter(meter, 1, clazz);
    }

    public static <T> Func1<T, T> meter(final Meter meter, final int count, Class<T> clazz) {
        return new Func1<T, T>() {

            @Override
            public T call(T t) {
                meter.mark(count);
                return t;
            }

        };
    }

}