Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.apache.druid.query.materializedview; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.base.Supplier; import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Sets; import com.google.common.util.concurrent.ListenableFuture; import com.google.common.util.concurrent.ListeningScheduledExecutorService; import com.google.common.util.concurrent.MoreExecutors; import com.google.inject.Inject; import org.apache.druid.guice.ManageLifecycleLast; import org.apache.druid.indexing.materializedview.DerivativeDataSourceMetadata; import org.apache.druid.indexing.overlord.DataSourceMetadata; import org.apache.druid.java.util.common.DateTimes; import org.apache.druid.java.util.common.Intervals; import org.apache.druid.java.util.common.Pair; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.java.util.common.concurrent.Execs; import org.apache.druid.java.util.common.lifecycle.LifecycleStart; import org.apache.druid.java.util.common.lifecycle.LifecycleStop; import org.apache.druid.java.util.emitter.EmittingLogger; import org.apache.druid.metadata.MetadataStorageTablesConfig; import org.apache.druid.metadata.SQLMetadataConnector; import org.apache.druid.timeline.DataSegment; import org.joda.time.Duration; import org.joda.time.Interval; import org.skife.jdbi.v2.Handle; import org.skife.jdbi.v2.StatementContext; import org.skife.jdbi.v2.tweak.HandleCallback; import org.skife.jdbi.v2.tweak.ResultSetMapper; import java.io.IOException; import java.sql.ResultSet; import java.sql.SQLException; import java.util.List; import java.util.Set; import java.util.SortedSet; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicReference; import java.util.stream.Collectors; /** * Read and store derivatives information from dataSource table frequently. * When optimize query, DerivativesManager offers the information about derivatives. */ @ManageLifecycleLast public class DerivativeDataSourceManager { private static final EmittingLogger log = new EmittingLogger(DerivativeDataSourceManager.class); private static final AtomicReference<ConcurrentHashMap<String, SortedSet<DerivativeDataSource>>> derivativesRef = new AtomicReference<>( new ConcurrentHashMap<>()); private final MaterializedViewConfig config; private final Supplier<MetadataStorageTablesConfig> dbTables; private final SQLMetadataConnector connector; private final ObjectMapper objectMapper; private final Object lock = new Object(); private boolean started = false; private ListeningScheduledExecutorService exec = null; private ListenableFuture<?> future = null; @Inject public DerivativeDataSourceManager(MaterializedViewConfig config, Supplier<MetadataStorageTablesConfig> dbTables, ObjectMapper objectMapper, SQLMetadataConnector connector) { this.config = config; this.dbTables = dbTables; this.objectMapper = objectMapper; this.connector = connector; } @LifecycleStart public void start() { log.info("starting derivatives manager."); synchronized (lock) { if (started) { return; } exec = MoreExecutors .listeningDecorator(Execs.scheduledSingleThreaded("DerivativeDataSourceManager-Exec-%d")); final Duration delay = config.getPollDuration().toStandardDuration(); future = exec.scheduleWithFixedDelay(new Runnable() { @Override public void run() { try { updateDerivatives(); } catch (Exception e) { log.makeAlert(e, "uncaught exception in derivatives manager updating thread").emit(); } } }, 0, delay.getMillis(), TimeUnit.MILLISECONDS); started = true; } log.info("Derivatives manager started."); } @LifecycleStop public void stop() { synchronized (lock) { if (!started) { return; } started = false; future.cancel(true); future = null; derivativesRef.set(new ConcurrentHashMap<>()); exec.shutdownNow(); exec = null; } } public static ImmutableSet<DerivativeDataSource> getDerivatives(String datasource) { return ImmutableSet.copyOf(derivativesRef.get().getOrDefault(datasource, Sets.newTreeSet())); } public static ImmutableMap<String, Set<DerivativeDataSource>> getAllDerivatives() { return ImmutableMap.copyOf(derivativesRef.get()); } private void updateDerivatives() { List<Pair<String, DerivativeDataSourceMetadata>> derivativesInDatabase = connector .retryWithHandle(handle -> handle .createQuery( StringUtils.format("SELECT DISTINCT dataSource,commit_metadata_payload FROM %1$s", dbTables.get().getDataSourceTable())) .map(new ResultSetMapper<Pair<String, DerivativeDataSourceMetadata>>() { @Override public Pair<String, DerivativeDataSourceMetadata> map(int index, ResultSet r, StatementContext ctx) throws SQLException { String datasourceName = r.getString("dataSource"); try { DataSourceMetadata payload = objectMapper.readValue( r.getBytes("commit_metadata_payload"), DataSourceMetadata.class); if (!(payload instanceof DerivativeDataSourceMetadata)) { return null; } DerivativeDataSourceMetadata metadata = (DerivativeDataSourceMetadata) payload; return new Pair<>(datasourceName, metadata); } catch (IOException e) { throw new RuntimeException(e); } } }).list()); List<DerivativeDataSource> derivativeDataSources = derivativesInDatabase.parallelStream() .filter(data -> data != null).map(derivatives -> { String name = derivatives.lhs; DerivativeDataSourceMetadata metadata = derivatives.rhs; String baseDataSource = metadata.getBaseDataSource(); long avgSizePerGranularity = getAvgSizePerGranularity(name); log.info("find derivatives: {bases=%s, derivative=%s, dimensions=%s, metrics=%s, avgSize=%s}", baseDataSource, name, metadata.getDimensions(), metadata.getMetrics(), avgSizePerGranularity); return new DerivativeDataSource(name, baseDataSource, metadata.getColumns(), avgSizePerGranularity); }).filter(derivatives -> derivatives.getAvgSizeBasedGranularity() > 0).collect(Collectors.toList()); ConcurrentHashMap<String, SortedSet<DerivativeDataSource>> newDerivatives = new ConcurrentHashMap<>(); for (DerivativeDataSource derivative : derivativeDataSources) { newDerivatives.putIfAbsent(derivative.getBaseDataSource(), Sets.newTreeSet()); newDerivatives.get(derivative.getBaseDataSource()).add(derivative); } ConcurrentHashMap<String, SortedSet<DerivativeDataSource>> current; do { current = derivativesRef.get(); } while (!derivativesRef.compareAndSet(current, newDerivatives)); } /** * caculate the average data size per segment granularity for a given datasource. * * e.g. for a datasource, there're 5 segments as follows, * interval = "2018-04-01/2017-04-02", segment size = 1024 * 1024 * 2 * interval = "2018-04-01/2017-04-02", segment size = 1024 * 1024 * 2 * interval = "2018-04-02/2017-04-03", segment size = 1024 * 1024 * 1 * interval = "2018-04-02/2017-04-03", segment size = 1024 * 1024 * 1 * interval = "2018-04-02/2017-04-03", segment size = 1024 * 1024 * 1 * Then, we get interval number = 2, total segment size = 1024 * 1024 * 7 * At last, return the result 1024 * 1024 * 7 / 2 = 1024 * 1024 * 3.5 * * @param datasource * @return average data size per segment granularity */ private long getAvgSizePerGranularity(String datasource) { return connector.retryWithHandle(new HandleCallback<Long>() { Set<Interval> intervals = Sets.newHashSet(); long totalSize = 0; @Override public Long withHandle(Handle handle) { handle.createQuery(StringUtils.format( "SELECT start,%1$send%1$s,payload FROM %2$s WHERE used = true AND dataSource = :dataSource", connector.getQuoteString(), dbTables.get().getSegmentsTable())) .bind("dataSource", datasource).map(new ResultSetMapper<Object>() { @Override public Object map(int index, ResultSet r, StatementContext ctx) throws SQLException { try { intervals.add(Intervals.utc(DateTimes.of(r.getString("start")).getMillis(), DateTimes.of(r.getString("end")).getMillis())); DataSegment segment = objectMapper.readValue(r.getBytes("payload"), DataSegment.class); totalSize += segment.getSize(); } catch (IOException e) { throw new RuntimeException(e); } return null; } }).first(); return intervals.isEmpty() ? 0L : totalSize / intervals.size(); } }); } }