org.apache.hadoop.hive.druid.io.DruidQueryBasedInputFormat.java Source code

Introduction

Here is the source code for org.apache.hadoop.hive.druid.io.DruidQueryBasedInputFormat.java
Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hive.druid.io;

import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.calcite.adapter.druid.DruidDateTimeUtils;
import org.apache.calcite.adapter.druid.DruidTable;
import org.apache.commons.lang3.StringEscapeUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.Constants;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.druid.DruidStorageHandler;
import org.apache.hadoop.hive.druid.DruidStorageHandlerUtils;
import org.apache.hadoop.hive.druid.serde.DruidGroupByQueryRecordReader;
import org.apache.hadoop.hive.druid.serde.DruidQueryRecordReader;
import org.apache.hadoop.hive.druid.serde.DruidSelectQueryRecordReader;
import org.apache.hadoop.hive.druid.serde.DruidTimeseriesQueryRecordReader;
import org.apache.hadoop.hive.druid.serde.DruidTopNQueryRecordReader;
import org.apache.hadoop.hive.druid.serde.DruidWritable;
import org.apache.hadoop.hive.shims.ShimLoader;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.jboss.netty.handler.codec.http.HttpMethod;
import org.joda.time.Interval;
import org.joda.time.Period;
import org.joda.time.chrono.ISOChronology;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.fasterxml.jackson.core.JsonParseException;
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.JsonMappingException;
import com.google.common.collect.Lists;
import com.metamx.common.lifecycle.Lifecycle;
import com.metamx.http.client.HttpClient;
import com.metamx.http.client.HttpClientConfig;
import com.metamx.http.client.HttpClientInit;
import com.metamx.http.client.Request;

import io.druid.query.BaseQuery;
import io.druid.query.Druids;
import io.druid.query.Druids.SegmentMetadataQueryBuilder;
import io.druid.query.Druids.SelectQueryBuilder;
import io.druid.query.Druids.TimeBoundaryQueryBuilder;
import io.druid.query.LocatedSegmentDescriptor;
import io.druid.query.Query;
import io.druid.query.Result;
import io.druid.query.SegmentDescriptor;
import io.druid.query.metadata.metadata.SegmentAnalysis;
import io.druid.query.metadata.metadata.SegmentMetadataQuery;
import io.druid.query.select.PagingSpec;
import io.druid.query.select.SelectQuery;
import io.druid.query.spec.MultipleIntervalSegmentSpec;
import io.druid.query.spec.MultipleSpecificSegmentSpec;
import io.druid.query.timeboundary.TimeBoundaryQuery;
import io.druid.query.timeboundary.TimeBoundaryResultValue;

/**
 * Druid query based input format.
 *
 * Given a query and the Druid broker address, it will send it, and retrieve
 * and parse the results.
 */
public class DruidQueryBasedInputFormat extends InputFormat<NullWritable, DruidWritable>
        implements org.apache.hadoop.mapred.InputFormat<NullWritable, DruidWritable> {

    protected static final Logger LOG = LoggerFactory.getLogger(DruidQueryBasedInputFormat.class);

    @Override
    public org.apache.hadoop.mapred.InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
        return getInputSplits(job);
    }

    @Override
    public List<InputSplit> getSplits(JobContext context) throws IOException, InterruptedException {
        return Arrays.<InputSplit>asList(getInputSplits(context.getConfiguration()));
    }

    @SuppressWarnings("deprecation")
    private HiveDruidSplit[] getInputSplits(Configuration conf) throws IOException {
        String address = HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_DRUID_BROKER_DEFAULT_ADDRESS);
        if (StringUtils.isEmpty(address)) {
            throw new IOException("Druid broker address not specified in configuration");
        }
        String druidQuery = StringEscapeUtils.unescapeJava(conf.get(Constants.DRUID_QUERY_JSON));
        String druidQueryType;
        if (StringUtils.isEmpty(druidQuery)) {
            // Empty, maybe because CBO did not run; we fall back to
            // full Select query
            if (LOG.isWarnEnabled()) {
                LOG.warn("Druid query is empty; creating Select query");
            }
            String dataSource = conf.get(Constants.DRUID_DATA_SOURCE);
            if (dataSource == null) {
                throw new IOException("Druid data source cannot be empty");
            }
            druidQuery = createSelectStarQuery(dataSource);
            druidQueryType = Query.SELECT;
        } else {
            druidQueryType = conf.get(Constants.DRUID_QUERY_TYPE);
            if (druidQueryType == null) {
                throw new IOException("Druid query type not recognized");
            }
        }

        // hive depends on FileSplits
        Job job = new Job(conf);
        JobContext jobContext = ShimLoader.getHadoopShims().newJobContext(job);
        Path[] paths = FileInputFormat.getInputPaths(jobContext);

        // We need to deserialize and serialize query so intervals are written in the JSON
        // Druid query with user timezone, as this is default Hive time semantics.
        // Then, create splits with the Druid queries.
        switch (druidQueryType) {
        case Query.TIMESERIES:
        case Query.TOPN:
        case Query.GROUP_BY:
            return new HiveDruidSplit[] {
                    new HiveDruidSplit(deserializeSerialize(druidQuery), paths[0], new String[] { address }) };
        case Query.SELECT:
            SelectQuery selectQuery = DruidStorageHandlerUtils.JSON_MAPPER.readValue(druidQuery, SelectQuery.class);
            boolean distributed = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_DRUID_SELECT_DISTRIBUTE);
            if (distributed) {
                return distributeSelectQuery(conf, address, selectQuery, paths[0]);
            } else {
                return splitSelectQuery(conf, address, selectQuery, paths[0]);
            }
        default:
            throw new IOException("Druid query type not recognized");
        }
    }

    private static String createSelectStarQuery(String dataSource) throws IOException {
        // Create Select query
        SelectQueryBuilder builder = new Druids.SelectQueryBuilder();
        builder.dataSource(dataSource);
        final List<Interval> intervals = Arrays.asList();
        builder.intervals(intervals);
        builder.pagingSpec(PagingSpec.newSpec(1));
        Map<String, Object> context = new HashMap<>();
        context.put(Constants.DRUID_QUERY_FETCH, false);
        builder.context(context);
        return DruidStorageHandlerUtils.JSON_MAPPER.writeValueAsString(builder.build());
    }

    /* New method that distributes the Select query by creating splits containing
     * information about different Druid nodes that have the data for the given
     * query. */
    private static HiveDruidSplit[] distributeSelectQuery(Configuration conf, String address, SelectQuery query,
            Path dummyPath) throws IOException {
        // If it has a limit, we use it and we do not distribute the query
        final boolean isFetch = query.getContextBoolean(Constants.DRUID_QUERY_FETCH, false);
        if (isFetch) {
            return new HiveDruidSplit[] {
                    new HiveDruidSplit(DruidStorageHandlerUtils.JSON_MAPPER.writeValueAsString(query), dummyPath,
                            new String[] { address }) };
        }

        final String intervals = StringUtils.join(query.getIntervals(), ","); // Comma-separated intervals without brackets
        final String request = String.format("http://%s/druid/v2/datasources/%s/candidates?intervals=%s", address,
                query.getDataSource().getNames().get(0), URLEncoder.encode(intervals, "UTF-8"));
        final InputStream response;
        try {
            response = DruidStorageHandlerUtils.submitRequest(DruidStorageHandler.getHttpClient(),
                    new Request(HttpMethod.GET, new URL(request)));
        } catch (Exception e) {
            throw new IOException(org.apache.hadoop.util.StringUtils.stringifyException(e));
        }

        // Retrieve results
        final List<LocatedSegmentDescriptor> segmentDescriptors;
        try {
            segmentDescriptors = DruidStorageHandlerUtils.JSON_MAPPER.readValue(response,
                    new TypeReference<List<LocatedSegmentDescriptor>>() {
                    });
        } catch (Exception e) {
            response.close();
            throw new IOException(org.apache.hadoop.util.StringUtils.stringifyException(e));
        }

        // Create one input split for each segment
        final int numSplits = segmentDescriptors.size();
        final HiveDruidSplit[] splits = new HiveDruidSplit[segmentDescriptors.size()];
        for (int i = 0; i < numSplits; i++) {
            final LocatedSegmentDescriptor locatedSD = segmentDescriptors.get(i);
            final String[] hosts = new String[locatedSD.getLocations().size()];
            for (int j = 0; j < locatedSD.getLocations().size(); j++) {
                hosts[j] = locatedSD.getLocations().get(j).getHost();
            }
            // Create partial Select query
            final SegmentDescriptor newSD = new SegmentDescriptor(locatedSD.getInterval(), locatedSD.getVersion(),
                    locatedSD.getPartitionNumber());
            final SelectQuery partialQuery = query
                    .withQuerySegmentSpec(new MultipleSpecificSegmentSpec(Lists.newArrayList(newSD)));
            splits[i] = new HiveDruidSplit(DruidStorageHandlerUtils.JSON_MAPPER.writeValueAsString(partialQuery),
                    dummyPath, hosts);
        }
        return splits;
    }

    /* Method that splits Select query depending on the threshold so read can be
     * parallelized. We will only contact the Druid broker to obtain all results. */
    private static HiveDruidSplit[] splitSelectQuery(Configuration conf, String address, SelectQuery query,
            Path dummyPath) throws IOException {
        final int selectThreshold = HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVE_DRUID_SELECT_THRESHOLD);

        final boolean isFetch = query.getContextBoolean(Constants.DRUID_QUERY_FETCH, false);
        if (isFetch) {
            // If it has a limit, we use it and we do not split the query
            return new HiveDruidSplit[] {
                    new HiveDruidSplit(DruidStorageHandlerUtils.JSON_MAPPER.writeValueAsString(query), dummyPath,
                            new String[] { address }) };
        }

        // We do not have the number of rows, thus we need to execute a
        // Segment Metadata query to obtain number of rows
        SegmentMetadataQueryBuilder metadataBuilder = new Druids.SegmentMetadataQueryBuilder();
        metadataBuilder.dataSource(query.getDataSource());
        metadataBuilder.intervals(query.getIntervals());
        metadataBuilder.merge(true);
        metadataBuilder.analysisTypes();
        SegmentMetadataQuery metadataQuery = metadataBuilder.build();
        InputStream response;
        try {
            response = DruidStorageHandlerUtils.submitRequest(DruidStorageHandler.getHttpClient(),
                    DruidStorageHandlerUtils.createRequest(address, metadataQuery));
        } catch (Exception e) {
            throw new IOException(org.apache.hadoop.util.StringUtils.stringifyException(e));
        }

        // Retrieve results
        List<SegmentAnalysis> metadataList;
        try {
            metadataList = DruidStorageHandlerUtils.SMILE_MAPPER.readValue(response,
                    new TypeReference<List<SegmentAnalysis>>() {
                    });
        } catch (Exception e) {
            response.close();
            throw new IOException(org.apache.hadoop.util.StringUtils.stringifyException(e));
        }
        if (metadataList == null) {
            throw new IOException("Connected to Druid but could not retrieve datasource information");
        }
        if (metadataList.isEmpty()) {
            // There are no rows for that time range, we can submit query as it is
            return new HiveDruidSplit[] {
                    new HiveDruidSplit(DruidStorageHandlerUtils.JSON_MAPPER.writeValueAsString(query), dummyPath,
                            new String[] { address }) };
        }
        if (metadataList.size() != 1) {
            throw new IOException("Information about segments should have been merged");
        }

        final long numRows = metadataList.get(0).getNumRows();

        query = query.withPagingSpec(PagingSpec.newSpec(Integer.MAX_VALUE));
        if (numRows <= selectThreshold) {
            // We are not going to split it
            return new HiveDruidSplit[] {
                    new HiveDruidSplit(DruidStorageHandlerUtils.JSON_MAPPER.writeValueAsString(query), dummyPath,
                            new String[] { address }) };
        }

        // If the query does not specify a timestamp, we obtain the total time using
        // a Time Boundary query. Then, we use the information to split the query
        // following the Select threshold configuration property
        final List<Interval> intervals = new ArrayList<>();
        if (query.getIntervals().size() == 1 && query.getIntervals().get(0)
                .withChronology(ISOChronology.getInstanceUTC()).equals(DruidTable.DEFAULT_INTERVAL)) {
            // Default max and min, we should execute a time boundary query to get a
            // more precise range
            TimeBoundaryQueryBuilder timeBuilder = new Druids.TimeBoundaryQueryBuilder();
            timeBuilder.dataSource(query.getDataSource());
            TimeBoundaryQuery timeQuery = timeBuilder.build();
            try {
                response = DruidStorageHandlerUtils.submitRequest(DruidStorageHandler.getHttpClient(),
                        DruidStorageHandlerUtils.createRequest(address, timeQuery));
            } catch (Exception e) {
                throw new IOException(org.apache.hadoop.util.StringUtils.stringifyException(e));
            }

            // Retrieve results
            List<Result<TimeBoundaryResultValue>> timeList;
            try {
                timeList = DruidStorageHandlerUtils.SMILE_MAPPER.readValue(response,
                        new TypeReference<List<Result<TimeBoundaryResultValue>>>() {
                        });
            } catch (Exception e) {
                response.close();
                throw new IOException(org.apache.hadoop.util.StringUtils.stringifyException(e));
            }
            if (timeList == null || timeList.isEmpty()) {
                throw new IOException("Connected to Druid but could not retrieve time boundary information");
            }
            if (timeList.size() != 1) {
                throw new IOException("We should obtain a single time boundary");
            }

            intervals.add(new Interval(timeList.get(0).getValue().getMinTime().getMillis(),
                    timeList.get(0).getValue().getMaxTime().getMillis(), ISOChronology.getInstanceUTC()));
        } else {
            intervals.addAll(query.getIntervals());
        }

        // Create (numRows/default threshold) input splits
        int numSplits = (int) Math.ceil((double) numRows / selectThreshold);
        List<List<Interval>> newIntervals = createSplitsIntervals(intervals, numSplits);
        HiveDruidSplit[] splits = new HiveDruidSplit[numSplits];
        for (int i = 0; i < numSplits; i++) {
            // Create partial Select query
            final SelectQuery partialQuery = query
                    .withQuerySegmentSpec(new MultipleIntervalSegmentSpec(newIntervals.get(i)));
            splits[i] = new HiveDruidSplit(DruidStorageHandlerUtils.JSON_MAPPER.writeValueAsString(partialQuery),
                    dummyPath, new String[] { address });
        }
        return splits;
    }

    private static List<List<Interval>> createSplitsIntervals(List<Interval> intervals, int numSplits) {

        long startTime = intervals.get(0).getStartMillis();
        long endTime = startTime;
        long currTime = 0;
        List<List<Interval>> newIntervals = new ArrayList<>();
        long totalTime = 0;
        for (Interval interval : intervals) {
            totalTime += interval.getEndMillis() - interval.getStartMillis();
        }
        for (int i = 0, posIntervals = 0; i < numSplits; i++) {
            final long rangeSize = Math.round((double) (totalTime * (i + 1)) / numSplits)
                    - Math.round((double) (totalTime * i) / numSplits);
            // Create the new interval(s)
            List<Interval> currentIntervals = new ArrayList<>();
            while (posIntervals < intervals.size()) {
                final Interval interval = intervals.get(posIntervals);
                final long expectedRange = rangeSize - currTime;
                if (interval.getEndMillis() - startTime >= expectedRange) {
                    endTime = startTime + expectedRange;
                    currentIntervals.add(new Interval(startTime, endTime, ISOChronology.getInstanceUTC()));
                    startTime = endTime;
                    currTime = 0;
                    break;
                }
                endTime = interval.getEndMillis();
                currentIntervals.add(new Interval(startTime, endTime, ISOChronology.getInstanceUTC()));
                currTime += (endTime - startTime);
                startTime = intervals.get(++posIntervals).getStartMillis();
            }
            newIntervals.add(currentIntervals);
        }
        assert endTime == intervals.get(intervals.size() - 1).getEndMillis();
        return newIntervals;
    }

    private static String deserializeSerialize(String druidQuery)
            throws JsonParseException, JsonMappingException, IOException {
        BaseQuery<?> deserializedQuery = DruidStorageHandlerUtils.JSON_MAPPER.readValue(druidQuery,
                BaseQuery.class);
        return DruidStorageHandlerUtils.JSON_MAPPER.writeValueAsString(deserializedQuery);
    }

    @Override
    public org.apache.hadoop.mapred.RecordReader<NullWritable, DruidWritable> getRecordReader(
            org.apache.hadoop.mapred.InputSplit split, JobConf job, Reporter reporter) throws IOException {
        // We need to provide a different record reader for every type of Druid query.
        // The reason is that Druid results format is different for each type.
        final DruidQueryRecordReader<?, ?> reader;
        final String druidQueryType = job.get(Constants.DRUID_QUERY_TYPE);
        if (druidQueryType == null) {
            reader = new DruidSelectQueryRecordReader(); // By default
            reader.initialize((HiveDruidSplit) split, job);
            return reader;
        }
        switch (druidQueryType) {
        case Query.TIMESERIES:
            reader = new DruidTimeseriesQueryRecordReader();
            break;
        case Query.TOPN:
            reader = new DruidTopNQueryRecordReader();
            break;
        case Query.GROUP_BY:
            reader = new DruidGroupByQueryRecordReader();
            break;
        case Query.SELECT:
            reader = new DruidSelectQueryRecordReader();
            break;
        default:
            throw new IOException("Druid query type not recognized");
        }
        reader.initialize((HiveDruidSplit) split, job);
        return reader;
    }

    @Override
    public RecordReader<NullWritable, DruidWritable> createRecordReader(InputSplit split,
            TaskAttemptContext context) throws IOException, InterruptedException {
        // We need to provide a different record reader for every type of Druid query.
        // The reason is that Druid results format is different for each type.
        final String druidQueryType = context.getConfiguration().get(Constants.DRUID_QUERY_TYPE);
        if (druidQueryType == null) {
            return new DruidSelectQueryRecordReader(); // By default
        }
        final DruidQueryRecordReader<?, ?> reader;
        switch (druidQueryType) {
        case Query.TIMESERIES:
            reader = new DruidTimeseriesQueryRecordReader();
            break;
        case Query.TOPN:
            reader = new DruidTopNQueryRecordReader();
            break;
        case Query.GROUP_BY:
            reader = new DruidGroupByQueryRecordReader();
            break;
        case Query.SELECT:
            reader = new DruidSelectQueryRecordReader();
            break;
        default:
            throw new IOException("Druid query type not recognized");
        }
        return reader;
    }

}