org.apache.druid.query.materializedview.DerivativeDataSourceManager.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.druid.query.materializedview.DerivativeDataSourceManager.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.druid.query.materializedview;

import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.base.Supplier;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Sets;
import com.google.common.util.concurrent.ListenableFuture;
import com.google.common.util.concurrent.ListeningScheduledExecutorService;
import com.google.common.util.concurrent.MoreExecutors;
import com.google.inject.Inject;
import org.apache.druid.guice.ManageLifecycleLast;
import org.apache.druid.indexing.materializedview.DerivativeDataSourceMetadata;
import org.apache.druid.indexing.overlord.DataSourceMetadata;
import org.apache.druid.java.util.common.DateTimes;
import org.apache.druid.java.util.common.Intervals;
import org.apache.druid.java.util.common.Pair;
import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.java.util.common.concurrent.Execs;
import org.apache.druid.java.util.common.lifecycle.LifecycleStart;
import org.apache.druid.java.util.common.lifecycle.LifecycleStop;
import org.apache.druid.java.util.emitter.EmittingLogger;
import org.apache.druid.metadata.MetadataStorageTablesConfig;
import org.apache.druid.metadata.SQLMetadataConnector;
import org.apache.druid.timeline.DataSegment;
import org.joda.time.Duration;
import org.joda.time.Interval;
import org.skife.jdbi.v2.Handle;
import org.skife.jdbi.v2.StatementContext;
import org.skife.jdbi.v2.tweak.HandleCallback;
import org.skife.jdbi.v2.tweak.ResultSetMapper;

import java.io.IOException;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.List;
import java.util.Set;
import java.util.SortedSet;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicReference;
import java.util.stream.Collectors;

/**
 * Read and store derivatives information from dataSource table frequently.
 * When optimize query, DerivativesManager offers the information about derivatives.
 */
@ManageLifecycleLast
public class DerivativeDataSourceManager {
    private static final EmittingLogger log = new EmittingLogger(DerivativeDataSourceManager.class);
    private static final AtomicReference<ConcurrentHashMap<String, SortedSet<DerivativeDataSource>>> derivativesRef = new AtomicReference<>(
            new ConcurrentHashMap<>());
    private final MaterializedViewConfig config;
    private final Supplier<MetadataStorageTablesConfig> dbTables;
    private final SQLMetadataConnector connector;
    private final ObjectMapper objectMapper;
    private final Object lock = new Object();

    private boolean started = false;
    private ListeningScheduledExecutorService exec = null;
    private ListenableFuture<?> future = null;

    @Inject
    public DerivativeDataSourceManager(MaterializedViewConfig config,
            Supplier<MetadataStorageTablesConfig> dbTables, ObjectMapper objectMapper,
            SQLMetadataConnector connector) {
        this.config = config;
        this.dbTables = dbTables;
        this.objectMapper = objectMapper;
        this.connector = connector;
    }

    @LifecycleStart
    public void start() {
        log.info("starting derivatives manager.");
        synchronized (lock) {
            if (started) {
                return;
            }
            exec = MoreExecutors
                    .listeningDecorator(Execs.scheduledSingleThreaded("DerivativeDataSourceManager-Exec-%d"));
            final Duration delay = config.getPollDuration().toStandardDuration();
            future = exec.scheduleWithFixedDelay(new Runnable() {
                @Override
                public void run() {
                    try {
                        updateDerivatives();
                    } catch (Exception e) {
                        log.makeAlert(e, "uncaught exception in derivatives manager updating thread").emit();
                    }
                }
            }, 0, delay.getMillis(), TimeUnit.MILLISECONDS);
            started = true;
        }
        log.info("Derivatives manager started.");
    }

    @LifecycleStop
    public void stop() {
        synchronized (lock) {
            if (!started) {
                return;
            }
            started = false;
            future.cancel(true);
            future = null;
            derivativesRef.set(new ConcurrentHashMap<>());
            exec.shutdownNow();
            exec = null;
        }
    }

    public static ImmutableSet<DerivativeDataSource> getDerivatives(String datasource) {
        return ImmutableSet.copyOf(derivativesRef.get().getOrDefault(datasource, Sets.newTreeSet()));
    }

    public static ImmutableMap<String, Set<DerivativeDataSource>> getAllDerivatives() {
        return ImmutableMap.copyOf(derivativesRef.get());
    }

    private void updateDerivatives() {
        List<Pair<String, DerivativeDataSourceMetadata>> derivativesInDatabase = connector
                .retryWithHandle(handle -> handle
                        .createQuery(
                                StringUtils.format("SELECT DISTINCT dataSource,commit_metadata_payload FROM %1$s",
                                        dbTables.get().getDataSourceTable()))
                        .map(new ResultSetMapper<Pair<String, DerivativeDataSourceMetadata>>() {
                            @Override
                            public Pair<String, DerivativeDataSourceMetadata> map(int index, ResultSet r,
                                    StatementContext ctx) throws SQLException {
                                String datasourceName = r.getString("dataSource");
                                try {
                                    DataSourceMetadata payload = objectMapper.readValue(
                                            r.getBytes("commit_metadata_payload"), DataSourceMetadata.class);
                                    if (!(payload instanceof DerivativeDataSourceMetadata)) {
                                        return null;
                                    }
                                    DerivativeDataSourceMetadata metadata = (DerivativeDataSourceMetadata) payload;
                                    return new Pair<>(datasourceName, metadata);
                                } catch (IOException e) {
                                    throw new RuntimeException(e);
                                }
                            }
                        }).list());

        List<DerivativeDataSource> derivativeDataSources = derivativesInDatabase.parallelStream()
                .filter(data -> data != null).map(derivatives -> {
                    String name = derivatives.lhs;
                    DerivativeDataSourceMetadata metadata = derivatives.rhs;
                    String baseDataSource = metadata.getBaseDataSource();
                    long avgSizePerGranularity = getAvgSizePerGranularity(name);
                    log.info("find derivatives: {bases=%s, derivative=%s, dimensions=%s, metrics=%s, avgSize=%s}",
                            baseDataSource, name, metadata.getDimensions(), metadata.getMetrics(),
                            avgSizePerGranularity);
                    return new DerivativeDataSource(name, baseDataSource, metadata.getColumns(),
                            avgSizePerGranularity);
                }).filter(derivatives -> derivatives.getAvgSizeBasedGranularity() > 0).collect(Collectors.toList());

        ConcurrentHashMap<String, SortedSet<DerivativeDataSource>> newDerivatives = new ConcurrentHashMap<>();
        for (DerivativeDataSource derivative : derivativeDataSources) {
            newDerivatives.putIfAbsent(derivative.getBaseDataSource(), Sets.newTreeSet());
            newDerivatives.get(derivative.getBaseDataSource()).add(derivative);
        }
        ConcurrentHashMap<String, SortedSet<DerivativeDataSource>> current;
        do {
            current = derivativesRef.get();
        } while (!derivativesRef.compareAndSet(current, newDerivatives));
    }

    /**
     * caculate the average data size per segment granularity for a given datasource.
     * 
     * e.g. for a datasource, there're 5 segments as follows,
     * interval = "2018-04-01/2017-04-02", segment size = 1024 * 1024 * 2
     * interval = "2018-04-01/2017-04-02", segment size = 1024 * 1024 * 2
     * interval = "2018-04-02/2017-04-03", segment size = 1024 * 1024 * 1
     * interval = "2018-04-02/2017-04-03", segment size = 1024 * 1024 * 1
     * interval = "2018-04-02/2017-04-03", segment size = 1024 * 1024 * 1
     * Then, we get interval number = 2, total segment size = 1024 * 1024 * 7
     * At last, return the result 1024 * 1024 * 7 / 2 = 1024 * 1024 * 3.5
     * 
     * @param datasource
     * @return average data size per segment granularity
     */
    private long getAvgSizePerGranularity(String datasource) {
        return connector.retryWithHandle(new HandleCallback<Long>() {
            Set<Interval> intervals = Sets.newHashSet();
            long totalSize = 0;

            @Override
            public Long withHandle(Handle handle) {
                handle.createQuery(StringUtils.format(
                        "SELECT start,%1$send%1$s,payload FROM %2$s WHERE used = true AND dataSource = :dataSource",
                        connector.getQuoteString(), dbTables.get().getSegmentsTable()))
                        .bind("dataSource", datasource).map(new ResultSetMapper<Object>() {
                            @Override
                            public Object map(int index, ResultSet r, StatementContext ctx) throws SQLException {
                                try {
                                    intervals.add(Intervals.utc(DateTimes.of(r.getString("start")).getMillis(),
                                            DateTimes.of(r.getString("end")).getMillis()));
                                    DataSegment segment = objectMapper.readValue(r.getBytes("payload"),
                                            DataSegment.class);
                                    totalSize += segment.getSize();
                                } catch (IOException e) {
                                    throw new RuntimeException(e);
                                }
                                return null;
                            }
                        }).first();
                return intervals.isEmpty() ? 0L : totalSize / intervals.size();
            }
        });
    }
}