com.metamx.druid.indexing.common.index.YeOldePlumberSchool.java Source code

Introduction

Here is the source code for com.metamx.druid.indexing.common.index.YeOldePlumberSchool.java
Source

/*
 * Druid - a distributed column store.
 * Copyright (C) 2012  Metamarkets Group Inc.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 */

package com.metamx.druid.indexing.common.index;

import com.fasterxml.jackson.annotation.JacksonInject;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.annotation.JsonTypeName;
import com.google.common.base.Preconditions;
import com.google.common.base.Throwables;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import com.metamx.common.logger.Logger;
import com.metamx.druid.Query;
import com.metamx.druid.client.DataSegment;
import com.metamx.druid.index.QueryableIndex;
import com.metamx.druid.index.v1.IndexIO;
import com.metamx.druid.index.v1.IndexMerger;
import com.metamx.druid.loading.DataSegmentPusher;
import com.metamx.druid.query.QueryRunner;
import com.metamx.druid.realtime.FireDepartmentMetrics;
import com.metamx.druid.realtime.FireHydrant;
import com.metamx.druid.realtime.plumber.Plumber;
import com.metamx.druid.realtime.plumber.PlumberSchool;
import com.metamx.druid.realtime.Schema;
import com.metamx.druid.realtime.plumber.Sink;

import org.apache.commons.io.FileUtils;
import org.joda.time.Interval;

import java.io.File;
import java.io.IOException;
import java.util.List;
import java.util.Set;

/**
 * Trains plumbers that create a single historical segment.
 */
@JsonTypeName("historical")
public class YeOldePlumberSchool implements PlumberSchool {
    private final Interval interval;
    private final String version;
    private final DataSegmentPusher dataSegmentPusher;
    private final File tmpSegmentDir;

    private static final Logger log = new Logger(YeOldePlumberSchool.class);

    @JsonCreator
    public YeOldePlumberSchool(@JsonProperty("interval") Interval interval, @JsonProperty("version") String version,
            @JacksonInject("segmentPusher") DataSegmentPusher dataSegmentPusher,
            @JacksonInject("tmpSegmentDir") File tmpSegmentDir) {
        this.interval = interval;
        this.version = version;
        this.dataSegmentPusher = dataSegmentPusher;
        this.tmpSegmentDir = tmpSegmentDir;
    }

    @Override
    public Plumber findPlumber(final Schema schema, final FireDepartmentMetrics metrics) {
        // There can be only one.
        final Sink theSink = new Sink(interval, schema, version);

        // Temporary directory to hold spilled segments.
        final File persistDir = new File(tmpSegmentDir, theSink.getSegment().getIdentifier());

        // Set of spilled segments. Will be merged at the end.
        final Set<File> spilled = Sets.newHashSet();

        return new Plumber() {
            @Override
            public void startJob() {

            }

            @Override
            public Sink getSink(long timestamp) {
                if (theSink.getInterval().contains(timestamp)) {
                    return theSink;
                } else {
                    return null;
                }
            }

            @Override
            public <T> QueryRunner<T> getQueryRunner(Query<T> query) {
                throw new UnsupportedOperationException("Don't query me, bro.");
            }

            @Override
            public void persist(Runnable commitRunnable) {
                spillIfSwappable();
                commitRunnable.run();
            }

            @Override
            public void finishJob() {
                // The segment we will upload
                File fileToUpload = null;

                try {
                    // User should have persisted everything by now.
                    Preconditions.checkState(!theSink.swappable(),
                            "All data must be persisted before fininshing the job!");

                    if (spilled.size() == 0) {
                        throw new IllegalStateException("Nothing indexed?");
                    } else if (spilled.size() == 1) {
                        fileToUpload = Iterables.getOnlyElement(spilled);
                    } else {
                        List<QueryableIndex> indexes = Lists.newArrayList();
                        for (final File oneSpill : spilled) {
                            indexes.add(IndexIO.loadIndex(oneSpill));
                        }

                        fileToUpload = new File(tmpSegmentDir, "merged");
                        IndexMerger.mergeQueryableIndex(indexes, schema.getAggregators(), fileToUpload);
                    }

                    // Map merged segment so we can extract dimensions
                    final QueryableIndex mappedSegment = IndexIO.loadIndex(fileToUpload);

                    final DataSegment segmentToUpload = theSink.getSegment()
                            .withDimensions(ImmutableList.copyOf(mappedSegment.getAvailableDimensions()))
                            .withBinaryVersion(IndexIO.getVersionFromDir(fileToUpload));

                    dataSegmentPusher.push(fileToUpload, segmentToUpload);

                    log.info("Uploaded segment[%s]", segmentToUpload.getIdentifier());

                } catch (Exception e) {
                    log.warn(e, "Failed to merge and upload");
                    throw Throwables.propagate(e);
                } finally {
                    try {
                        if (fileToUpload != null) {
                            log.info("Deleting Index File[%s]", fileToUpload);
                            FileUtils.deleteDirectory(fileToUpload);
                        }
                    } catch (IOException e) {
                        log.warn(e, "Error deleting directory[%s]", fileToUpload);
                    }
                }
            }

            private void spillIfSwappable() {
                if (theSink.swappable()) {
                    final FireHydrant indexToPersist = theSink.swap();
                    final int rowsToPersist = indexToPersist.getIndex().size();
                    final File dirToPersist = getSpillDir(indexToPersist.getCount());

                    log.info("Spilling index[%d] with rows[%d] to: %s", indexToPersist.getCount(), rowsToPersist,
                            dirToPersist);

                    try {

                        IndexMerger.persist(indexToPersist.getIndex(), dirToPersist);

                        indexToPersist.swapSegment(null);

                        metrics.incrementRowOutputCount(rowsToPersist);

                        spilled.add(dirToPersist);

                    } catch (Exception e) {
                        log.warn(e, "Failed to spill index[%d]", indexToPersist.getCount());
                        throw Throwables.propagate(e);
                    }
                }
            }

            private File getSpillDir(final int n) {
                return new File(persistDir, String.format("spill%d", n));
            }
        };
    }
}