org.apache.druid.indexing.common.task.RealtimeIndexTask.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.druid.indexing.common.task.RealtimeIndexTask.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.druid.indexing.common.task;

import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonIgnore;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.base.Preconditions;
import com.google.common.base.Supplier;
import com.google.common.base.Throwables;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import org.apache.commons.io.FileUtils;
import org.apache.druid.data.input.Committer;
import org.apache.druid.data.input.Firehose;
import org.apache.druid.data.input.FirehoseFactory;
import org.apache.druid.discovery.DiscoveryDruidNode;
import org.apache.druid.discovery.DruidNodeDiscoveryProvider;
import org.apache.druid.discovery.LookupNodeService;
import org.apache.druid.indexer.TaskStatus;
import org.apache.druid.indexing.common.TaskLock;
import org.apache.druid.indexing.common.TaskLockType;
import org.apache.druid.indexing.common.TaskRealtimeMetricsMonitorBuilder;
import org.apache.druid.indexing.common.TaskToolbox;
import org.apache.druid.indexing.common.actions.LockAcquireAction;
import org.apache.druid.indexing.common.actions.LockReleaseAction;
import org.apache.druid.indexing.common.actions.TaskActionClient;
import org.apache.druid.java.util.common.DateTimes;
import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.java.util.common.guava.CloseQuietly;
import org.apache.druid.java.util.emitter.EmittingLogger;
import org.apache.druid.query.FinalizeResultsQueryRunner;
import org.apache.druid.query.Query;
import org.apache.druid.query.QueryRunner;
import org.apache.druid.query.QueryRunnerFactory;
import org.apache.druid.query.QueryRunnerFactoryConglomerate;
import org.apache.druid.query.QueryToolChest;
import org.apache.druid.segment.indexing.DataSchema;
import org.apache.druid.segment.indexing.RealtimeIOConfig;
import org.apache.druid.segment.indexing.RealtimeTuningConfig;
import org.apache.druid.segment.realtime.FireDepartment;
import org.apache.druid.segment.realtime.FireDepartmentMetrics;
import org.apache.druid.segment.realtime.RealtimeMetricsMonitor;
import org.apache.druid.segment.realtime.SegmentPublisher;
import org.apache.druid.segment.realtime.firehose.ClippedFirehoseFactory;
import org.apache.druid.segment.realtime.firehose.EventReceiverFirehoseFactory;
import org.apache.druid.segment.realtime.firehose.TimedShutoffFirehoseFactory;
import org.apache.druid.segment.realtime.plumber.Committers;
import org.apache.druid.segment.realtime.plumber.Plumber;
import org.apache.druid.segment.realtime.plumber.PlumberSchool;
import org.apache.druid.segment.realtime.plumber.Plumbers;
import org.apache.druid.segment.realtime.plumber.RealtimePlumberSchool;
import org.apache.druid.segment.realtime.plumber.VersioningPolicy;
import org.apache.druid.server.coordination.DataSegmentAnnouncer;
import org.apache.druid.timeline.DataSegment;
import org.joda.time.DateTime;
import org.joda.time.Interval;

import java.io.File;
import java.io.IOException;
import java.util.Map;
import java.util.Timer;
import java.util.TimerTask;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ThreadLocalRandom;

public class RealtimeIndexTask extends AbstractTask {
    public static final String CTX_KEY_LOOKUP_TIER = "lookupTier";

    private static final EmittingLogger log = new EmittingLogger(RealtimeIndexTask.class);

    private static final int TASK_ID_BITS_PER_SYMBOL = 4;
    private static final int TASK_ID_SYMBOL_MASK = (1 << TASK_ID_BITS_PER_SYMBOL) - 1;
    private static final int TASK_ID_LENGTH = Integer.BYTES / TASK_ID_BITS_PER_SYMBOL;

    public static String makeRandomId() {
        final StringBuilder suffix = new StringBuilder(TASK_ID_LENGTH);
        int randomBits = ThreadLocalRandom.current().nextInt();
        for (int i = 0; i < TASK_ID_LENGTH; i++) {
            suffix.append((char) ('a' + ((randomBits >>> (i * TASK_ID_BITS_PER_SYMBOL)) & TASK_ID_SYMBOL_MASK)));
        }
        return suffix.toString();
    }

    private static String makeTaskId(FireDepartment fireDepartment) {
        return makeTaskId(fireDepartment.getDataSchema().getDataSource(),
                fireDepartment.getTuningConfig().getShardSpec().getPartitionNum(), DateTimes.nowUtc(),
                makeRandomId());
    }

    static String makeTaskId(String dataSource, int partitionNumber, DateTime timestamp, String suffix) {
        return StringUtils.format("index_realtime_%s_%d_%s_%s", dataSource, partitionNumber, timestamp, suffix);
    }

    private static String makeDatasource(FireDepartment fireDepartment) {
        return fireDepartment.getDataSchema().getDataSource();
    }

    @JsonIgnore
    private final FireDepartment spec;

    @JsonIgnore
    private volatile Plumber plumber = null;

    @JsonIgnore
    private volatile Firehose firehose = null;

    @JsonIgnore
    private volatile FireDepartmentMetrics metrics = null;

    @JsonIgnore
    private volatile boolean gracefullyStopped = false;

    @JsonIgnore
    private volatile boolean finishingJob = false;

    @JsonIgnore
    private volatile Thread runThread = null;

    @JsonIgnore
    private volatile QueryRunnerFactoryConglomerate queryRunnerFactoryConglomerate = null;

    @JsonCreator
    public RealtimeIndexTask(@JsonProperty("id") String id, @JsonProperty("resource") TaskResource taskResource,
            @JsonProperty("spec") FireDepartment fireDepartment,
            @JsonProperty("context") Map<String, Object> context) {
        super(id == null ? makeTaskId(fireDepartment) : id,
                StringUtils.format("index_realtime_%s", makeDatasource(fireDepartment)), taskResource,
                makeDatasource(fireDepartment), context);
        this.spec = fireDepartment;
    }

    @Override
    public int getPriority() {
        return getContextValue(Tasks.PRIORITY_KEY, Tasks.DEFAULT_REALTIME_TASK_PRIORITY);
    }

    @Override
    public String getType() {
        return "index_realtime";
    }

    @Override
    public String getNodeType() {
        return "realtime";
    }

    @Override
    public <T> QueryRunner<T> getQueryRunner(Query<T> query) {
        if (plumber != null) {
            QueryRunnerFactory<T, Query<T>> factory = queryRunnerFactoryConglomerate.findFactory(query);
            QueryToolChest<T, Query<T>> toolChest = factory.getToolchest();

            return new FinalizeResultsQueryRunner<T>(plumber.getQueryRunner(query), toolChest);
        } else {
            return null;
        }
    }

    @Override
    public boolean isReady(TaskActionClient taskActionClient) {
        return true;
    }

    @Override
    public TaskStatus run(final TaskToolbox toolbox) throws Exception {
        runThread = Thread.currentThread();

        if (this.plumber != null) {
            throw new IllegalStateException("WTF?!? run with non-null plumber??!");
        }

        setupTimeoutAlert();

        boolean normalExit = true;

        // It would be nice to get the PlumberSchool in the constructor.  Although that will need jackson injectables for
        // stuff like the ServerView, which seems kind of odd?  Perhaps revisit this when Guice has been introduced.

        final SegmentPublisher segmentPublisher = new TaskActionSegmentPublisher(toolbox);

        // NOTE: We talk to the coordinator in various places in the plumber and we could be more robust to issues
        // with the coordinator.  Right now, we'll block/throw in whatever thread triggered the coordinator behavior,
        // which will typically be either the main data processing loop or the persist thread.

        // Wrap default DataSegmentAnnouncer such that we unlock intervals as we unannounce segments
        final long lockTimeoutMs = getContextValue(Tasks.LOCK_TIMEOUT_KEY, Tasks.DEFAULT_LOCK_TIMEOUT_MILLIS);
        // Note: if lockTimeoutMs is larger than ServerConfig.maxIdleTime, http timeout error can occur while waiting for a
        // lock to be acquired.
        final DataSegmentAnnouncer lockingSegmentAnnouncer = new DataSegmentAnnouncer() {
            @Override
            public void announceSegment(final DataSegment segment) throws IOException {
                // Side effect: Calling announceSegment causes a lock to be acquired
                Preconditions.checkNotNull(
                        toolbox.getTaskActionClient()
                                .submit(new LockAcquireAction(TaskLockType.EXCLUSIVE, segment.getInterval(),
                                        lockTimeoutMs)),
                        "Cannot acquire a lock for interval[%s]", segment.getInterval());
                toolbox.getSegmentAnnouncer().announceSegment(segment);
            }

            @Override
            public void unannounceSegment(final DataSegment segment) throws IOException {
                try {
                    toolbox.getSegmentAnnouncer().unannounceSegment(segment);
                } finally {
                    toolbox.getTaskActionClient().submit(new LockReleaseAction(segment.getInterval()));
                }
            }

            @Override
            public void announceSegments(Iterable<DataSegment> segments) throws IOException {
                // Side effect: Calling announceSegments causes locks to be acquired
                for (DataSegment segment : segments) {
                    Preconditions.checkNotNull(
                            toolbox.getTaskActionClient()
                                    .submit(new LockAcquireAction(TaskLockType.EXCLUSIVE, segment.getInterval(),
                                            lockTimeoutMs)),
                            "Cannot acquire a lock for interval[%s]", segment.getInterval());
                }
                toolbox.getSegmentAnnouncer().announceSegments(segments);
            }

            @Override
            public void unannounceSegments(Iterable<DataSegment> segments) throws IOException {
                try {
                    toolbox.getSegmentAnnouncer().unannounceSegments(segments);
                } finally {
                    for (DataSegment segment : segments) {
                        toolbox.getTaskActionClient().submit(new LockReleaseAction(segment.getInterval()));
                    }
                }
            }
        };

        // NOTE: getVersion will block if there is lock contention, which will block plumber.getSink
        // NOTE: (and thus the firehose)

        // Shouldn't usually happen, since we don't expect people to submit tasks that intersect with the
        // realtime window, but if they do it can be problematic. If we decide to care, we can use more threads in
        // the plumber such that waiting for the coordinator doesn't block data processing.
        final VersioningPolicy versioningPolicy = new VersioningPolicy() {
            @Override
            public String getVersion(final Interval interval) {
                try {
                    // Side effect: Calling getVersion causes a lock to be acquired
                    final LockAcquireAction action = new LockAcquireAction(TaskLockType.EXCLUSIVE, interval,
                            lockTimeoutMs);
                    final TaskLock lock = Preconditions.checkNotNull(toolbox.getTaskActionClient().submit(action),
                            "Cannot acquire a lock for interval[%s]", interval);

                    return lock.getVersion();
                } catch (IOException e) {
                    throw Throwables.propagate(e);
                }
            }
        };

        DataSchema dataSchema = spec.getDataSchema();
        RealtimeIOConfig realtimeIOConfig = spec.getIOConfig();
        RealtimeTuningConfig tuningConfig = spec.getTuningConfig().withBasePersistDirectory(toolbox.getPersistDir())
                .withVersioningPolicy(versioningPolicy);

        final FireDepartment fireDepartment = new FireDepartment(dataSchema, realtimeIOConfig, tuningConfig);
        this.metrics = fireDepartment.getMetrics();
        final RealtimeMetricsMonitor metricsMonitor = TaskRealtimeMetricsMonitorBuilder.build(this, fireDepartment);

        this.queryRunnerFactoryConglomerate = toolbox.getQueryRunnerFactoryConglomerate();

        // NOTE: This pusher selects path based purely on global configuration and the DataSegment, which means
        // NOTE: that redundant realtime tasks will upload to the same location. This can cause index.zip
        // NOTE: (partitionNum_index.zip for HDFS data storage) and descriptor.json (partitionNum_descriptor.json for
        // NOTE: HDFS data storage) to mismatch, or it can cause historical nodes to load different instances of
        // NOTE: the "same" segment.
        final PlumberSchool plumberSchool = new RealtimePlumberSchool(toolbox.getEmitter(),
                toolbox.getQueryRunnerFactoryConglomerate(), toolbox.getSegmentPusher(), lockingSegmentAnnouncer,
                segmentPublisher, toolbox.getSegmentHandoffNotifierFactory(), toolbox.getQueryExecutorService(),
                toolbox.getIndexMergerV9(), toolbox.getIndexIO(), toolbox.getCache(), toolbox.getCacheConfig(),
                toolbox.getCachePopulatorStats(), toolbox.getObjectMapper());

        this.plumber = plumberSchool.findPlumber(dataSchema, tuningConfig, metrics);

        Supplier<Committer> committerSupplier = null;
        final File firehoseTempDir = toolbox.getFirehoseTemporaryDir();

        LookupNodeService lookupNodeService = getContextValue(CTX_KEY_LOOKUP_TIER) == null
                ? toolbox.getLookupNodeService()
                : new LookupNodeService((String) getContextValue(CTX_KEY_LOOKUP_TIER));
        DiscoveryDruidNode discoveryDruidNode = new DiscoveryDruidNode(toolbox.getDruidNode(),
                DruidNodeDiscoveryProvider.NODE_TYPE_PEON, ImmutableMap.of(toolbox.getDataNodeService().getName(),
                        toolbox.getDataNodeService(), lookupNodeService.getName(), lookupNodeService));

        try {
            toolbox.getDataSegmentServerAnnouncer().announce();
            toolbox.getDruidNodeAnnouncer().announce(discoveryDruidNode);

            plumber.startJob();

            // Set up metrics emission
            toolbox.getMonitorScheduler().addMonitor(metricsMonitor);

            // Firehose temporary directory is automatically removed when this RealtimeIndexTask completes.
            FileUtils.forceMkdir(firehoseTempDir);

            // Delay firehose connection to avoid claiming input resources while the plumber is starting up.
            final FirehoseFactory firehoseFactory = spec.getIOConfig().getFirehoseFactory();
            final boolean firehoseDrainableByClosing = isFirehoseDrainableByClosing(firehoseFactory);

            // Skip connecting firehose if we've been stopped before we got started.
            synchronized (this) {
                if (!gracefullyStopped) {
                    firehose = firehoseFactory.connect(spec.getDataSchema().getParser(), firehoseTempDir);
                    committerSupplier = Committers.supplierFromFirehose(firehose);
                }
            }

            // Time to read data!
            while (firehose != null && (!gracefullyStopped || firehoseDrainableByClosing) && firehose.hasMore()) {
                Plumbers.addNextRow(committerSupplier, firehose, plumber, tuningConfig.isReportParseExceptions(),
                        metrics);
            }
        } catch (Throwable e) {
            normalExit = false;
            log.makeAlert(e, "Exception aborted realtime processing[%s]", dataSchema.getDataSource()).emit();
            throw e;
        } finally {
            if (normalExit) {
                try {
                    // Persist if we had actually started.
                    if (firehose != null) {
                        log.info("Persisting remaining data.");

                        final Committer committer = committerSupplier.get();
                        final CountDownLatch persistLatch = new CountDownLatch(1);
                        plumber.persist(new Committer() {
                            @Override
                            public Object getMetadata() {
                                return committer.getMetadata();
                            }

                            @Override
                            public void run() {
                                try {
                                    committer.run();
                                } finally {
                                    persistLatch.countDown();
                                }
                            }
                        });
                        persistLatch.await();
                    }

                    if (gracefullyStopped) {
                        log.info("Gracefully stopping.");
                    } else {
                        log.info("Finishing the job.");
                        synchronized (this) {
                            if (gracefullyStopped) {
                                // Someone called stopGracefully after we checked the flag. That's okay, just stop now.
                                log.info("Gracefully stopping.");
                            } else {
                                finishingJob = true;
                            }
                        }

                        if (finishingJob) {
                            plumber.finishJob();
                        }
                    }
                } catch (InterruptedException e) {
                    log.debug(e, "Interrupted while finishing the job");
                } catch (Exception e) {
                    log.makeAlert(e, "Failed to finish realtime task").emit();
                    throw e;
                } finally {
                    if (firehose != null) {
                        CloseQuietly.close(firehose);
                    }
                    toolbox.getMonitorScheduler().removeMonitor(metricsMonitor);
                }
            }

            toolbox.getDataSegmentServerAnnouncer().unannounce();
            toolbox.getDruidNodeAnnouncer().unannounce(discoveryDruidNode);
        }

        log.info("Job done!");
        return TaskStatus.success(getId());
    }

    @Override
    public boolean canRestore() {
        return true;
    }

    @Override
    public void stopGracefully() {
        try {
            synchronized (this) {
                if (!gracefullyStopped) {
                    gracefullyStopped = true;
                    if (firehose == null) {
                        log.info("stopGracefully: Firehose not started yet, so nothing to stop.");
                    } else if (finishingJob) {
                        log.info("stopGracefully: Interrupting finishJob.");
                        runThread.interrupt();
                    } else if (isFirehoseDrainableByClosing(spec.getIOConfig().getFirehoseFactory())) {
                        log.info("stopGracefully: Draining firehose.");
                        firehose.close();
                    } else {
                        log.info("stopGracefully: Cannot drain firehose by closing, interrupting run thread.");
                        runThread.interrupt();
                    }
                }
            }
        } catch (Exception e) {
            throw Throwables.propagate(e);
        }
    }

    /**
     * Public for tests.
     */
    @JsonIgnore
    public Firehose getFirehose() {
        return firehose;
    }

    /**
     * Public for tests.
     */
    @JsonIgnore
    public FireDepartmentMetrics getMetrics() {
        return metrics;
    }

    @JsonProperty("spec")
    public FireDepartment getRealtimeIngestionSchema() {
        return spec;
    }

    /**
     * Is a firehose from this factory drainable by closing it? If so, we should drain on stopGracefully rather than
     * abruptly stopping.
     *
     * This is a hack to get around the fact that the Firehose and FirehoseFactory interfaces do not help us do this.
     *
     * Protected for tests.
     */
    protected boolean isFirehoseDrainableByClosing(FirehoseFactory firehoseFactory) {
        return firehoseFactory instanceof EventReceiverFirehoseFactory
                || (firehoseFactory instanceof TimedShutoffFirehoseFactory && isFirehoseDrainableByClosing(
                        ((TimedShutoffFirehoseFactory) firehoseFactory).getDelegateFactory()))
                || (firehoseFactory instanceof ClippedFirehoseFactory
                        && isFirehoseDrainableByClosing(((ClippedFirehoseFactory) firehoseFactory).getDelegate()));
    }

    public static class TaskActionSegmentPublisher implements SegmentPublisher {
        final TaskToolbox taskToolbox;

        public TaskActionSegmentPublisher(TaskToolbox taskToolbox) {
            this.taskToolbox = taskToolbox;
        }

        @Override
        public void publishSegment(DataSegment segment) throws IOException {
            taskToolbox.publishSegments(ImmutableList.of(segment));
        }
    }

    private void setupTimeoutAlert() {
        if (spec.getTuningConfig().getAlertTimeout() > 0) {
            Timer timer = new Timer("RealtimeIndexTask-Timer", true);
            timer.schedule(new TimerTask() {
                @Override
                public void run() {
                    log.makeAlert(
                            "RealtimeIndexTask for dataSource [%s] hasn't finished in configured time [%d] ms.",
                            spec.getDataSchema().getDataSource(), spec.getTuningConfig().getAlertTimeout()).emit();
                }
            }, spec.getTuningConfig().getAlertTimeout());
        }
    }
}