Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.apache.druid.metadata; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.base.Preconditions; import com.google.common.base.Supplier; import com.google.common.base.Throwables; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Iterables; import com.google.common.collect.Iterators; import com.google.common.collect.Lists; import com.google.common.util.concurrent.Futures; import com.google.errorprone.annotations.concurrent.GuardedBy; import com.google.inject.Inject; import org.apache.druid.client.DataSourcesSnapshot; import org.apache.druid.client.ImmutableDruidDataSource; import org.apache.druid.guice.ManageLifecycle; import org.apache.druid.java.util.common.DateTimes; import org.apache.druid.java.util.common.JodaUtils; import org.apache.druid.java.util.common.MapUtils; import org.apache.druid.java.util.common.Pair; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.java.util.common.concurrent.Execs; import org.apache.druid.java.util.common.lifecycle.LifecycleStart; import org.apache.druid.java.util.common.lifecycle.LifecycleStop; import org.apache.druid.java.util.emitter.EmittingLogger; import org.apache.druid.timeline.DataSegment; import org.apache.druid.timeline.SegmentId; import org.apache.druid.timeline.VersionedIntervalTimeline; import org.checkerframework.checker.nullness.qual.MonotonicNonNull; import org.joda.time.DateTime; import org.joda.time.Duration; import org.joda.time.Interval; import org.skife.jdbi.v2.BaseResultSetMapper; import org.skife.jdbi.v2.Batch; import org.skife.jdbi.v2.FoldController; import org.skife.jdbi.v2.Handle; import org.skife.jdbi.v2.Query; import org.skife.jdbi.v2.StatementContext; import org.skife.jdbi.v2.TransactionCallback; import org.skife.jdbi.v2.TransactionStatus; import org.skife.jdbi.v2.tweak.ResultSetMapper; import javax.annotation.Nullable; import java.io.IOException; import java.sql.ResultSet; import java.sql.SQLException; import java.util.ArrayList; import java.util.Collection; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Objects; import java.util.Set; import java.util.concurrent.CompletableFuture; import java.util.concurrent.Future; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.TimeUnit; import java.util.concurrent.locks.ReentrantReadWriteLock; import java.util.stream.Collectors; import java.util.stream.StreamSupport; /** * */ @ManageLifecycle public class SQLMetadataSegmentManager implements MetadataSegmentManager { private static final EmittingLogger log = new EmittingLogger(SQLMetadataSegmentManager.class); /** * Marker interface for objects stored in {@link #latestDatabasePoll}. See the comment for that field for details. */ private interface DatabasePoll { } /** Represents periodic {@link #poll}s happening from {@link #exec}. */ private static class PeriodicDatabasePoll implements DatabasePoll { /** * This future allows to wait until {@link #dataSourcesSnapshot} is initialized in the first {@link #poll()} * happening since {@link #startPollingDatabasePeriodically()} is called for the first time, or since the last * visible (in happens-before terms) call to {@link #startPollingDatabasePeriodically()} in case of Coordinator's * leadership changes. */ final CompletableFuture<Void> firstPollCompletionFuture = new CompletableFuture<>(); } /** * Represents on-demand {@link #poll} initiated at periods of time when SqlSegmentsMetadata doesn't poll the database * periodically. */ private static class OnDemandDatabasePoll implements DatabasePoll { final long initiationTimeNanos = System.nanoTime(); final CompletableFuture<Void> pollCompletionFuture = new CompletableFuture<>(); long nanosElapsedFromInitiation() { return System.nanoTime() - initiationTimeNanos; } } /** * Use to synchronize {@link #startPollingDatabasePeriodically}, {@link #stopPollingDatabasePeriodically}, {@link * #poll}, and {@link #isPollingDatabasePeriodically}. These methods should be synchronized to prevent from being * called at the same time if two different threads are calling them. This might be possible if Coordinator gets and * drops leadership repeatedly in quick succession. * * This lock is also used to synchronize {@link #awaitOrPerformDatabasePoll} for times when SqlSegmentsMetadata * is not polling the database periodically (in other words, when the Coordinator is not the leader). */ private final ReentrantReadWriteLock startStopPollLock = new ReentrantReadWriteLock(); /** * Used to ensure that {@link #poll()} is never run concurrently. It should already be so (at least in production * code), where {@link #poll()} is called only from the task created in {@link #createPollTaskForStartOrder} and is * scheduled in a single-threaded {@link #exec}, so this lock is an additional safety net in case there are bugs in * the code, and for tests, where {@link #poll()} is called from the outside code. * * Not using {@link #startStopPollLock}.writeLock() in order to still be able to run {@link #poll()} concurrently * with {@link #isPollingDatabasePeriodically()}. */ private final Object pollLock = new Object(); private final ObjectMapper jsonMapper; private final Duration periodicPollDelay; private final Supplier<MetadataStorageTablesConfig> dbTables; private final SQLMetadataConnector connector; /** * This field is made volatile to avoid "ghost secondary reads" that may result in NPE, see * https://github.com/code-review-checklists/java-concurrency#safe-local-dcl (note that dataSourcesSnapshot resembles * a lazily initialized field). Alternative is to always read the field in a snapshot local variable, but it's too * easy to forget to do. * * This field may be updated from {@link #exec}, or from whatever thread calling {@link #doOnDemandPoll} via {@link * #awaitOrPerformDatabasePoll()} via one of the public methods of SqlSegmentsMetadata. */ private volatile @MonotonicNonNull DataSourcesSnapshot dataSourcesSnapshot = null; /** * The latest {@link DatabasePoll} represent {@link #poll()} calls which update {@link #dataSourcesSnapshot}, either * periodically (see {@link PeriodicDatabasePoll}, {@link #startPollingDatabasePeriodically}, {@link * #stopPollingDatabasePeriodically}) or "on demand" (see {@link OnDemandDatabasePoll}), when one of the methods that * accesses {@link #dataSourcesSnapshot}'s state (such as {@link #getImmutableDataSourceWithUsedSegments}) is * called when the Coordinator is not the leader and therefore SqlSegmentsMetadata isn't polling the database * periodically. * * Note that if there is a happens-before relationship between a call to {@link #startPollingDatabasePeriodically()} * (on Coordinators' leadership change) and one of the methods accessing the {@link #dataSourcesSnapshot}'s state in * this class the latter is guaranteed to await for the initiated periodic poll. This is because when the latter * method calls to {@link #awaitLatestDatabasePoll()} via {@link #awaitOrPerformDatabasePoll}, they will * see the latest {@link PeriodicDatabasePoll} value (stored in this field, latestDatabasePoll, in {@link * #startPollingDatabasePeriodically()}) and to await on its {@link PeriodicDatabasePoll#firstPollCompletionFuture}. * * However, the guarantee explained above doesn't make any actual semantic difference, because on both periodic and * on-demand database polls the same invariant is maintained that the results not older than {@link * #periodicPollDelay} are used. The main difference is in performance: since on-demand polls are irregular and happen * in the context of the thread wanting to access the {@link #dataSourcesSnapshot}, that may cause delays in the * logic. On the other hand, periodic polls are decoupled into {@link #exec} and {@link * #dataSourcesSnapshot}-accessing methods should be generally "wait free" for database polls. * * The notion and the complexity of "on demand" database polls was introduced to simplify the interface of {@link * MetadataSegmentManager} and guarantee that it always returns consistent and relatively up-to-date data from methods * like {@link #getImmutableDataSourceWithUsedSegments}, while avoiding excessive repetitive polls. The last part * is achieved via "hooking on" other polls by awaiting on {@link PeriodicDatabasePoll#firstPollCompletionFuture} or * {@link OnDemandDatabasePoll#pollCompletionFuture}, see {@link #awaitOrPerformDatabasePoll} method * implementation for details. * * Note: the overall implementation of periodic/on-demand polls is not completely optimal: for example, when the * Coordinator just stopped leading, the latest periodic {@link #poll} (which is still "fresh") is not considered * and a new on-demand poll is always initiated. This is done to simplify the implementation, while the efficiency * during Coordinator leadership switches is not a priority. * * This field is {@code volatile} because it's checked and updated in a double-checked locking manner in {@link * #awaitOrPerformDatabasePoll()}. */ private volatile @Nullable DatabasePoll latestDatabasePoll = null; /** Used to cancel periodic poll task in {@link #stopPollingDatabasePeriodically}. */ @GuardedBy("startStopPollLock") private @Nullable Future<?> periodicPollTaskFuture = null; /** The number of times {@link #startPollingDatabasePeriodically} was called. */ @GuardedBy("startStopPollLock") private long startPollingCount = 0; /** * Equal to the current {@link #startPollingCount} value if the SqlSegmentsMetadata is currently started; -1 if * currently stopped. * * This field is used to implement a simple stamp mechanism instead of just a boolean "started" flag to prevent * the theoretical situation of two or more tasks scheduled in {@link #startPollingDatabasePeriodically()} calling * {@link #isPollingDatabasePeriodically()} and {@link #poll()} concurrently, if the sequence of {@link * #startPollingDatabasePeriodically()} - {@link #stopPollingDatabasePeriodically()} - {@link * #startPollingDatabasePeriodically()} actions occurs quickly. * * {@link SQLMetadataRuleManager} also has a similar issue. */ @GuardedBy("startStopPollLock") private long currentStartPollingOrder = -1; @GuardedBy("startStopPollLock") private @Nullable ScheduledExecutorService exec = null; @Inject public SQLMetadataSegmentManager(ObjectMapper jsonMapper, Supplier<MetadataSegmentManagerConfig> config, Supplier<MetadataStorageTablesConfig> dbTables, SQLMetadataConnector connector) { this.jsonMapper = jsonMapper; this.periodicPollDelay = config.get().getPollDuration().toStandardDuration(); this.dbTables = dbTables; this.connector = connector; } /** * Don't confuse this method with {@link #startPollingDatabasePeriodically}. This is a lifecycle starting method to * be executed just once for an instance of SqlSegmentsMetadata. */ @LifecycleStart public void start() { ReentrantReadWriteLock.WriteLock lock = startStopPollLock.writeLock(); lock.lock(); try { if (exec != null) { return; // Already started } exec = Execs.scheduledSingleThreaded(getClass().getName() + "-Exec--%d"); } finally { lock.unlock(); } } /** * Don't confuse this method with {@link #stopPollingDatabasePeriodically}. This is a lifecycle stopping method to * be executed just once for an instance of SqlSegmentsMetadata. */ @LifecycleStop public void stop() { ReentrantReadWriteLock.WriteLock lock = startStopPollLock.writeLock(); lock.lock(); try { exec.shutdownNow(); exec = null; } finally { lock.unlock(); } } @Override public void startPollingDatabasePeriodically() { ReentrantReadWriteLock.WriteLock lock = startStopPollLock.writeLock(); lock.lock(); try { if (exec == null) { throw new IllegalStateException(getClass().getName() + " is not started"); } if (isPollingDatabasePeriodically()) { return; } PeriodicDatabasePoll periodicPollUpdate = new PeriodicDatabasePoll(); latestDatabasePoll = periodicPollUpdate; startPollingCount++; currentStartPollingOrder = startPollingCount; final long localStartOrder = currentStartPollingOrder; periodicPollTaskFuture = exec.scheduleWithFixedDelay( createPollTaskForStartOrder(localStartOrder, periodicPollUpdate), 0, periodicPollDelay.getMillis(), TimeUnit.MILLISECONDS); } finally { lock.unlock(); } } private Runnable createPollTaskForStartOrder(long startOrder, PeriodicDatabasePoll periodicPollUpdate) { return () -> { // poll() is synchronized together with startPollingDatabasePeriodically(), stopPollingDatabasePeriodically() and // isPollingDatabasePeriodically() to ensure that when stopPollingDatabasePeriodically() exits, poll() won't // actually run anymore after that (it could only enter the synchronized section and exit immediately because the // localStartedOrder doesn't match the new currentStartPollingOrder). It's needed to avoid flakiness in // SqlSegmentsMetadataTest. See https://github.com/apache/incubator-druid/issues/6028 ReentrantReadWriteLock.ReadLock lock = startStopPollLock.readLock(); lock.lock(); try { if (startOrder == currentStartPollingOrder) { poll(); periodicPollUpdate.firstPollCompletionFuture.complete(null); } else { log.debug("startOrder = currentStartPollingOrder = %d, skipping poll()", startOrder); } } catch (Throwable t) { log.makeAlert(t, "Uncaught exception in %s's polling thread", SQLMetadataSegmentManager.class) .emit(); // Swallow the exception, so that scheduled polling goes on. Leave firstPollFutureSinceLastStart uncompleted // for now, so that it may be completed during the next poll. if (!(t instanceof Exception)) { // Don't try to swallow a Throwable which is not an Exception (that is, a Error). periodicPollUpdate.firstPollCompletionFuture.completeExceptionally(t); throw t; } } finally { lock.unlock(); } }; } @Override public boolean isPollingDatabasePeriodically() { // isPollingDatabasePeriodically() is synchronized together with startPollingDatabasePeriodically(), // stopPollingDatabasePeriodically() and poll() to ensure that the latest currentStartPollingOrder is always // visible. readLock should be used to avoid unexpected performance degradation of DruidCoordinator. ReentrantReadWriteLock.ReadLock lock = startStopPollLock.readLock(); lock.lock(); try { return currentStartPollingOrder >= 0; } finally { lock.unlock(); } } @Override public void stopPollingDatabasePeriodically() { ReentrantReadWriteLock.WriteLock lock = startStopPollLock.writeLock(); lock.lock(); try { if (!isPollingDatabasePeriodically()) { return; } periodicPollTaskFuture.cancel(false); latestDatabasePoll = null; // NOT nulling dataSourcesSnapshot, allowing to query the latest polled data even when this SegmentsMetadata // object is stopped. currentStartPollingOrder = -1; } finally { lock.unlock(); } } private void awaitOrPerformDatabasePoll() { // Double-checked locking with awaitLatestDatabasePoll() call playing the role of the "check". if (awaitLatestDatabasePoll()) { return; } ReentrantReadWriteLock.WriteLock lock = startStopPollLock.writeLock(); lock.lock(); try { if (awaitLatestDatabasePoll()) { return; } OnDemandDatabasePoll newOnDemandUpdate = new OnDemandDatabasePoll(); this.latestDatabasePoll = newOnDemandUpdate; doOnDemandPoll(newOnDemandUpdate); } finally { lock.unlock(); } } /** * If the latest {@link DatabasePoll} is a {@link PeriodicDatabasePoll}, or an {@link OnDemandDatabasePoll} that is * made not longer than {@link #periodicPollDelay} from now, awaits for it and returns true; returns false otherwise, * meaning that a new on-demand database poll should be initiated. */ private boolean awaitLatestDatabasePoll() { DatabasePoll latestDatabasePoll = this.latestDatabasePoll; if (latestDatabasePoll instanceof PeriodicDatabasePoll) { Futures.getUnchecked(((PeriodicDatabasePoll) latestDatabasePoll).firstPollCompletionFuture); return true; } if (latestDatabasePoll instanceof OnDemandDatabasePoll) { long periodicPollDelayNanos = TimeUnit.MILLISECONDS.toNanos(periodicPollDelay.getMillis()); OnDemandDatabasePoll latestOnDemandPoll = (OnDemandDatabasePoll) latestDatabasePoll; boolean latestUpdateIsFresh = latestOnDemandPoll.nanosElapsedFromInitiation() < periodicPollDelayNanos; if (latestUpdateIsFresh) { Futures.getUnchecked(latestOnDemandPoll.pollCompletionFuture); return true; } // Latest on-demand update is not fresh. Fall through to return false from this method. } else { assert latestDatabasePoll == null; // No periodic updates and no on-demand database poll have been done yet, nothing to await for. } return false; } private void doOnDemandPoll(OnDemandDatabasePoll onDemandPoll) { try { poll(); onDemandPoll.pollCompletionFuture.complete(null); } catch (Throwable t) { onDemandPoll.pollCompletionFuture.completeExceptionally(t); throw t; } } @Override public boolean markSegmentAsUsed(final String segmentId) { try { int numUpdatedDatabaseEntries = connector.getDBI() .withHandle((Handle handle) -> handle.createStatement( StringUtils.format("UPDATE %s SET used=true WHERE id = :id", getSegmentsTable())) .bind("id", segmentId).execute()); // Unlike bulk markAsUsed methods: markAsUsedAllNonOvershadowedSegmentsInDataSource(), // markAsUsedNonOvershadowedSegmentsInInterval(), and markAsUsedNonOvershadowedSegments() we don't put the marked // segment into the respective data source, because we don't have it fetched from the database. It's probably not // worth complicating the implementation and making two database queries just to add the segment because it will // be anyway fetched during the next poll(). Segment putting that is done in the bulk markAsUsed methods is a nice // to have thing, but doesn't formally affects the external guarantees of SegmentsMetadata class. return numUpdatedDatabaseEntries > 0; } catch (RuntimeException e) { log.error(e, "Exception marking segment %s as used", segmentId); throw e; } } @Override public int markAsUsedAllNonOvershadowedSegmentsInDataSource(final String dataSource) { return doMarkAsUsedNonOvershadowedSegments(dataSource, null); } @Override public int markAsUsedNonOvershadowedSegmentsInInterval(final String dataSource, final Interval interval) { Preconditions.checkNotNull(interval); return doMarkAsUsedNonOvershadowedSegments(dataSource, interval); } /** * Implementation for both {@link #markAsUsedAllNonOvershadowedSegmentsInDataSource} (if the given interval is null) * and {@link #markAsUsedNonOvershadowedSegmentsInInterval}. */ private int doMarkAsUsedNonOvershadowedSegments(String dataSourceName, @Nullable Interval interval) { List<DataSegment> usedSegmentsOverlappingInterval = new ArrayList<>(); List<DataSegment> unusedSegmentsInInterval = new ArrayList<>(); connector.inReadOnlyTransaction((handle, status) -> { String queryString = StringUtils.format("SELECT used, payload FROM %1$s WHERE dataSource = :dataSource", getSegmentsTable()); if (interval != null) { queryString += StringUtils.format(" AND start < :end AND %1$send%1$s > :start", connector.getQuoteString()); } Query<?> query = handle.createQuery(queryString).setFetchSize(connector.getStreamingFetchSize()) .bind("dataSource", dataSourceName); if (interval != null) { query = query.bind("start", interval.getStart().toString()).bind("end", interval.getEnd().toString()); } query = query.map((int index, ResultSet resultSet, StatementContext context) -> { try { DataSegment segment = jsonMapper.readValue(resultSet.getBytes("payload"), DataSegment.class); if (resultSet.getBoolean("used")) { usedSegmentsOverlappingInterval.add(segment); } else { if (interval == null || interval.contains(segment.getInterval())) { unusedSegmentsInInterval.add(segment); } } return null; } catch (IOException e) { throw new RuntimeException(e); } }); // Consume the query results to ensure usedSegmentsOverlappingInterval and unusedSegmentsInInterval are // populated. consume(query.iterator()); return null; }); VersionedIntervalTimeline<String, DataSegment> versionedIntervalTimeline = VersionedIntervalTimeline .forSegments(Iterators.concat(usedSegmentsOverlappingInterval.iterator(), unusedSegmentsInInterval.iterator())); return markNonOvershadowedSegmentsAsUsed(unusedSegmentsInInterval, versionedIntervalTimeline); } private static void consume(Iterator<?> iterator) { while (iterator.hasNext()) { iterator.next(); } } private int markNonOvershadowedSegmentsAsUsed(List<DataSegment> unusedSegments, VersionedIntervalTimeline<String, DataSegment> timeline) { List<String> segmentIdsToMarkAsUsed = new ArrayList<>(); for (DataSegment segment : unusedSegments) { if (timeline.isOvershadowed(segment.getInterval(), segment.getVersion(), segment)) { continue; } segmentIdsToMarkAsUsed.add(segment.getId().toString()); } return markSegmentsAsUsed(segmentIdsToMarkAsUsed); } @Override public int markAsUsedNonOvershadowedSegments(final String dataSource, final Set<String> segmentIds) throws UnknownSegmentIdException { try { Pair<List<DataSegment>, VersionedIntervalTimeline<String, DataSegment>> unusedSegmentsAndTimeline = connector .inReadOnlyTransaction((handle, status) -> { List<DataSegment> unusedSegments = retrieveUnusedSegments(dataSource, segmentIds, handle); List<Interval> unusedSegmentsIntervals = JodaUtils.condenseIntervals( unusedSegments.stream().map(DataSegment::getInterval).collect(Collectors.toList())); Iterator<DataSegment> usedSegmentsOverlappingUnusedSegmentsIntervals = retrieveUsedSegmentsOverlappingIntervals( dataSource, unusedSegmentsIntervals, handle); VersionedIntervalTimeline<String, DataSegment> timeline = VersionedIntervalTimeline .forSegments(Iterators.concat(usedSegmentsOverlappingUnusedSegmentsIntervals, unusedSegments.iterator())); return new Pair<>(unusedSegments, timeline); }); List<DataSegment> unusedSegments = unusedSegmentsAndTimeline.lhs; VersionedIntervalTimeline<String, DataSegment> timeline = unusedSegmentsAndTimeline.rhs; return markNonOvershadowedSegmentsAsUsed(unusedSegments, timeline); } catch (Exception e) { Throwable rootCause = Throwables.getRootCause(e); if (rootCause instanceof UnknownSegmentIdException) { throw (UnknownSegmentIdException) rootCause; } else { throw e; } } } private List<DataSegment> retrieveUnusedSegments(final String dataSource, final Set<String> segmentIds, final Handle handle) throws UnknownSegmentIdException { List<String> unknownSegmentIds = new ArrayList<>(); List<DataSegment> segments = segmentIds.stream().map(segmentId -> { Iterator<DataSegment> segmentResultIterator = handle .createQuery(StringUtils.format( "SELECT used, payload FROM %1$s WHERE dataSource = :dataSource AND id = :id", getSegmentsTable())) .bind("dataSource", dataSource).bind("id", segmentId) .map((int index, ResultSet resultSet, StatementContext context) -> { try { if (!resultSet.getBoolean("used")) { return jsonMapper.readValue(resultSet.getBytes("payload"), DataSegment.class); } else { // We emit nulls for used segments. They are filtered out below in this method. return null; } } catch (IOException e) { throw new RuntimeException(e); } }).iterator(); if (!segmentResultIterator.hasNext()) { unknownSegmentIds.add(segmentId); return null; } else { @Nullable DataSegment segment = segmentResultIterator.next(); if (segmentResultIterator.hasNext()) { log.error( "There is more than one row corresponding to segment id [%s] in data source [%s] in the database", segmentId, dataSource); } return segment; } }).filter(Objects::nonNull) // Filter nulls corresponding to used segments. .collect(Collectors.toList()); if (!unknownSegmentIds.isEmpty()) { throw new UnknownSegmentIdException(unknownSegmentIds); } return segments; } private Iterator<DataSegment> retrieveUsedSegmentsOverlappingIntervals(final String dataSource, final Collection<Interval> intervals, final Handle handle) { return intervals.stream().flatMap(interval -> { Iterable<DataSegment> segmentResultIterable = () -> handle .createQuery(StringUtils.format("SELECT payload FROM %1$s " + "WHERE dataSource = :dataSource AND start < :end AND %2$send%2$s > :start AND used = true", getSegmentsTable(), connector.getQuoteString())) .setFetchSize(connector.getStreamingFetchSize()).bind("dataSource", dataSource) .bind("start", interval.getStart().toString()).bind("end", interval.getEnd().toString()) .map((int index, ResultSet resultSet, StatementContext context) -> { try { return jsonMapper.readValue(resultSet.getBytes("payload"), DataSegment.class); } catch (IOException e) { throw new RuntimeException(e); } }).iterator(); return StreamSupport.stream(segmentResultIterable.spliterator(), false); }).iterator(); } private int markSegmentsAsUsed(final List<String> segmentIds) { if (segmentIds.isEmpty()) { log.info("No segments found to update!"); return 0; } return connector.getDBI().withHandle(handle -> { Batch batch = handle.createBatch(); segmentIds.forEach(segmentId -> batch.add( StringUtils.format("UPDATE %s SET used=true WHERE id = '%s'", getSegmentsTable(), segmentId))); int[] segmentChanges = batch.execute(); return computeNumChangedSegments(segmentIds, segmentChanges); }); } @Override public int markAsUnusedAllSegmentsInDataSource(final String dataSource) { try { final int numUpdatedDatabaseEntries = connector.getDBI() .withHandle((Handle handle) -> handle .createStatement(StringUtils.format( "UPDATE %s SET used=false WHERE dataSource = :dataSource", getSegmentsTable())) .bind("dataSource", dataSource).execute()); return numUpdatedDatabaseEntries; } catch (RuntimeException e) { log.error(e, "Exception marking all segments as unused in data source [%s]", dataSource); throw e; } } /** * This method does not update {@link #dataSourcesSnapshot}, see the comments in {@link #doPoll()} about * snapshot update. The update of the segment's state will be reflected after the next {@link DatabasePoll}. */ @Override public boolean markSegmentAsUnused(final String segmentId) { try { return markSegmentAsUnusedInDatabase(segmentId); } catch (RuntimeException e) { log.error(e, "Exception marking segment [%s] as unused", segmentId); throw e; } } @Override public int markSegmentsAsUnused(String dataSourceName, Set<String> segmentIds) { if (segmentIds.isEmpty()) { return 0; } final List<String> segmentIdList = new ArrayList<>(segmentIds); try { return connector.getDBI().withHandle(handle -> { Batch batch = handle.createBatch(); segmentIdList.forEach(segmentId -> batch .add(StringUtils.format("UPDATE %s SET used=false WHERE datasource = '%s' AND id = '%s'", getSegmentsTable(), dataSourceName, segmentId))); final int[] segmentChanges = batch.execute(); return computeNumChangedSegments(segmentIdList, segmentChanges); }); } catch (Exception e) { throw new RuntimeException(e); } } @Override public int markAsUnusedSegmentsInInterval(String dataSourceName, Interval interval) { try { Integer numUpdatedDatabaseEntries = connector.getDBI() .withHandle(handle -> handle .createStatement(StringUtils.format( "UPDATE %s SET used=false WHERE datasource = :datasource " + "AND start >= :start AND %2$send%2$s <= :end", getSegmentsTable(), connector.getQuoteString())) .bind("datasource", dataSourceName).bind("start", interval.getStart().toString()) .bind("end", interval.getEnd().toString()).execute()); return numUpdatedDatabaseEntries; } catch (Exception e) { throw new RuntimeException(e); } } private boolean markSegmentAsUnusedInDatabase(String segmentId) { final int numUpdatedRows = connector.getDBI().withHandle(handle -> handle .createStatement( StringUtils.format("UPDATE %s SET used=false WHERE id = :segmentID", getSegmentsTable())) .bind("segmentID", segmentId).execute()); if (numUpdatedRows < 0) { log.assertionError("Negative number of rows updated for segment id [%s]: %d", segmentId, numUpdatedRows); } else if (numUpdatedRows > 1) { log.error( "More than one row updated for segment id [%s]: %d, " + "there may be more than one row for the segment id in the database", segmentId, numUpdatedRows); } return numUpdatedRows > 0; } private static int computeNumChangedSegments(List<String> segmentIds, int[] segmentChanges) { int numChangedSegments = 0; for (int i = 0; i < segmentChanges.length; i++) { int numUpdatedRows = segmentChanges[i]; if (numUpdatedRows < 0) { log.assertionError("Negative number of rows updated for segment id [%s]: %d", segmentIds.get(i), numUpdatedRows); } else if (numUpdatedRows > 1) { log.error( "More than one row updated for segment id [%s]: %d, " + "there may be more than one row for the segment id in the database", segmentIds.get(i), numUpdatedRows); } if (numUpdatedRows > 0) { numChangedSegments += 1; } } return numChangedSegments; } @Override public @Nullable ImmutableDruidDataSource getImmutableDataSourceWithUsedSegments(String dataSourceName) { return getSnapshotOfDataSourcesWithAllUsedSegments().getDataSource(dataSourceName); } @Override public Collection<ImmutableDruidDataSource> getImmutableDataSourcesWithAllUsedSegments() { return getSnapshotOfDataSourcesWithAllUsedSegments().getDataSourcesWithAllUsedSegments(); } @Override public Set<SegmentId> getOvershadowedSegments() { return getSnapshotOfDataSourcesWithAllUsedSegments().getOvershadowedSegments(); } @Override public DataSourcesSnapshot getSnapshotOfDataSourcesWithAllUsedSegments() { awaitOrPerformDatabasePoll(); return dataSourcesSnapshot; } @Override public Iterable<DataSegment> iterateAllUsedSegments() { awaitOrPerformDatabasePoll(); return () -> dataSourcesSnapshot.getDataSourcesWithAllUsedSegments().stream() .flatMap(dataSource -> dataSource.getSegments().stream()).iterator(); } @Override public Collection<String> retrieveAllDataSourceNames() { return connector.getDBI().withHandle(handle -> handle .createQuery(StringUtils.format("SELECT DISTINCT(datasource) FROM %s", getSegmentsTable())) .fold(new ArrayList<>(), (List<String> druidDataSources, Map<String, Object> stringObjectMap, FoldController foldController, StatementContext statementContext) -> { druidDataSources.add(MapUtils.getString(stringObjectMap, "datasource")); return druidDataSources; })); } @Override public void poll() { // See the comment to the pollLock field, explaining this synchronized block synchronized (pollLock) { doPoll(); } } /** This method is extracted from {@link #poll()} solely to reduce code nesting. */ @GuardedBy("pollLock") private void doPoll() { log.debug("Starting polling of segment table"); // some databases such as PostgreSQL require auto-commit turned off // to stream results back, enabling transactions disables auto-commit // // setting connection to read-only will allow some database such as MySQL // to automatically use read-only transaction mode, further optimizing the query final List<DataSegment> segments = connector .inReadOnlyTransaction(new TransactionCallback<List<DataSegment>>() { @Override public List<DataSegment> inTransaction(Handle handle, TransactionStatus status) { return handle .createQuery(StringUtils.format("SELECT payload FROM %s WHERE used=true", getSegmentsTable())) .setFetchSize(connector.getStreamingFetchSize()) .map(new ResultSetMapper<DataSegment>() { @Override public DataSegment map(int index, ResultSet r, StatementContext ctx) throws SQLException { try { DataSegment segment = jsonMapper.readValue(r.getBytes("payload"), DataSegment.class); return replaceWithExistingSegmentIfPresent(segment); } catch (IOException e) { log.makeAlert(e, "Failed to read segment from db.").emit(); // If one entry in database is corrupted doPoll() should continue to work overall. See // filter by `Objects::nonNull` below in this method. return null; } } }).list(); } }); Preconditions.checkNotNull(segments, "Unexpected 'null' when polling segments from the db, aborting snapshot update."); // dataSourcesSnapshot is updated only here and the DataSourcesSnapshot object is immutable. If data sources or // segments are marked as used or unused directly (via markAs...() methods in MetadataSegmentManager), the // dataSourcesSnapshot can become invalid until the next database poll. // DataSourcesSnapshot computes the overshadowed segments, which makes it an expensive operation if the // snapshot was invalidated on each segment mark as unused or used, especially if a user issues a lot of single // segment mark calls in rapid succession. So the snapshot update is not done outside of database poll at this time. // Updates outside of database polls were primarily for the user experience, so users would immediately see the // effect of a segment mark call reflected in MetadataResource API calls. ImmutableMap<String, String> dataSourceProperties = createDefaultDataSourceProperties(); if (segments.isEmpty()) { log.info("No segments found in the database!"); } else { log.info("Polled and found %,d segments in the database", segments.size()); } dataSourcesSnapshot = DataSourcesSnapshot.fromUsedSegments(Iterables.filter(segments, Objects::nonNull), // Filter corrupted entries (see above in this method). dataSourceProperties); } private static ImmutableMap<String, String> createDefaultDataSourceProperties() { return ImmutableMap.of("created", DateTimes.nowUtc().toString()); } /** * For the garbage collector in Java, it's better to keep new objects short-living, but once they are old enough * (i. e. promoted to old generation), try to keep them alive. In {@link #poll()}, we fetch and deserialize all * existing segments each time, and then replace them in {@link #dataSourcesSnapshot}. This method allows to use * already existing (old) segments when possible, effectively interning them a-la {@link String#intern} or {@link * com.google.common.collect.Interner}, aiming to make the majority of {@link DataSegment} objects garbage soon after * they are deserialized and to die in young generation. It allows to avoid fragmentation of the old generation and * full GCs. */ private DataSegment replaceWithExistingSegmentIfPresent(DataSegment segment) { @MonotonicNonNull DataSourcesSnapshot dataSourcesSnapshot = this.dataSourcesSnapshot; if (dataSourcesSnapshot == null) { return segment; } @Nullable ImmutableDruidDataSource dataSource = dataSourcesSnapshot.getDataSource(segment.getDataSource()); if (dataSource == null) { return segment; } DataSegment alreadyExistingSegment = dataSource.getSegment(segment.getId()); return alreadyExistingSegment != null ? alreadyExistingSegment : segment; } private String getSegmentsTable() { return dbTables.get().getSegmentsTable(); } @Override public List<Interval> getUnusedSegmentIntervals(final String dataSource, final DateTime maxEndTime, final int limit) { return connector.inReadOnlyTransaction(new TransactionCallback<List<Interval>>() { @Override public List<Interval> inTransaction(Handle handle, TransactionStatus status) { Iterator<Interval> iter = handle .createQuery(StringUtils.format( "SELECT start, %2$send%2$s FROM %1$s WHERE dataSource = :dataSource AND " + "%2$send%2$s <= :end AND used = false ORDER BY start, %2$send%2$s", getSegmentsTable(), connector.getQuoteString())) .setFetchSize(connector.getStreamingFetchSize()).setMaxRows(limit) .bind("dataSource", dataSource).bind("end", maxEndTime.toString()) .map(new BaseResultSetMapper<Interval>() { @Override protected Interval mapInternal(int index, Map<String, Object> row) { return new Interval(DateTimes.of((String) row.get("start")), DateTimes.of((String) row.get("end"))); } }).iterator(); List<Interval> result = Lists.newArrayListWithCapacity(limit); for (int i = 0; i < limit && iter.hasNext(); i++) { try { result.add(iter.next()); } catch (Exception e) { throw new RuntimeException(e); } } return result; } }); } }