org.apache.hadoop.hbase.client.coprocessor.TimeseriesAggregationClient.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.hadoop.hbase.client.coprocessor.TimeseriesAggregationClient.java

Source

/**
 * Copyright 2014 Julian Wissmann 
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License. You may obtain a copy of the
 * License at 
 * 
 * http://www.apache.org/licenses/LICENSE-2.0 
 * 
 * Unless required by applicable law or agreed
 * to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific
 * language governing permissions and limitations under the License.
 */
package org.apache.hadoop.hbase.client.coprocessor;

import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentSkipListMap;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.coprocessor.ColumnInterpreter;
import org.apache.hadoop.hbase.ipc.BlockingRpcCallback;
import org.apache.hadoop.hbase.ipc.ServerRpcController;
import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
import org.apache.hadoop.hbase.protobuf.generated.TimeseriesAggregateProtos.TimeseriesRange;
import org.apache.hadoop.hbase.protobuf.generated.TimeseriesAggregateProtos.TimeseriesAggregateRequest;
import org.apache.hadoop.hbase.protobuf.generated.TimeseriesAggregateProtos.TimeseriesAggregateResponse;
import org.apache.hadoop.hbase.protobuf.generated.TimeseriesAggregateProtos.TimeseriesAggregateResponseMapEntry;
import org.apache.hadoop.hbase.protobuf.generated.TimeseriesAggregateProtos.TimeseriesAggregateService;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.Pair;

import com.google.protobuf.ByteString;
import com.google.protobuf.Message;

/**
 * This client class is for invoking the aggregate functions deployed on the Region Server side via
 * the TimeseriesAggregateService. This class will implement the supporting functionality for
 * summing/processing the individual results obtained from the TimeseriesAggregateService for each
 * region.
 * <p>
 * This will serve as the client side handler for invoking the aggregate functions.
 * <ul>
 * For all aggregate functions,
 * <li>start row < end row is an essential condition (if they are not
 * {@link HConstants#EMPTY_BYTE_ARRAY})
 * <li>Column family can't be null. In case where multiple families are provided, an IOException
 * will be thrown. An optional column qualifier can also be defined.
 * <li>For methods to find maximum, minimum, sum, rowcount, it returns the parameter type. For
 * average and std, it returns a double value. For row count, it returns a long value. <T> Cell
 * value data type <S> Promoted data type
 * <P>
 * PB message that is used to transport initializer specific bytes
 * <Q>PB message that is used to transport Cell (<T>) instance <R> PB message that is used to
 * transport Promoted (<S>) instance
 */
@InterfaceAudience.Private
public class TimeseriesAggregationClient {

    private static final Log log = LogFactory.getLog(TimeseriesAggregationClient.class);
    private Configuration conf;
    private int intervalSeconds;
    private Integer timestampSecondsMin, timestampSecondsMax;
    private String keyFilterPattern;

    /**
     * Constructor with Conf object
     * @param cfg
     */
    public TimeseriesAggregationClient(Configuration cfg, int intervalSeconds) {
        this.conf = cfg;
        this.intervalSeconds = intervalSeconds;
    }

    /**
     * Constructor with Conf object
     * @param cfg
     */
    public TimeseriesAggregationClient(Configuration cfg, int intervalSeconds, int timestampSecondsMin,
            int timestampSecondsMax, String keyFilterPattern) {
        this.conf = cfg;
        this.intervalSeconds = intervalSeconds;
        this.timestampSecondsMin = timestampSecondsMin;
        this.timestampSecondsMax = timestampSecondsMax;
        this.keyFilterPattern = keyFilterPattern;
    }

    /**
     * It gives the maximum value of a column for a given column family for the given range. In case
     * qualifier is null, a max of all values for the given family is returned.
     * @param tableName
     * @param ci
     * @param scan
     * @return max val ConcurrentSkipListMap<Long, R> (Will come as proto from region needs to be
     *         passed out as ConcurrentSkipListMap)
     * @throws Throwable The caller is supposed to handle the exception as they are thrown &
     *           propagated to it.
     */
    public <R, S, P extends Message, Q extends Message, T extends Message> ConcurrentSkipListMap<Long, R> max(
            final TableName tableName, final ColumnInterpreter<R, S, P, Q, T> ci, final Scan scan)
            throws Throwable {
        HTable table = null;
        try {
            table = new HTable(conf, tableName);
            return max(table, ci, scan);
        } finally {
            if (table != null) {
                table.close();
            }
        }
    }

    /**
     * It gives the maximum value of a column for a given column family for the given range. In case
     * qualifier is null, a max of all values for the given family is returned.
     * @param table
     * @param ci
     * @param scan
     * @return max val ConcurrentSkipListMap<Long, R> (Will come as proto from region needs to be
     *         passed out as ConcurrentSkipListMap)
     * @throws Throwable The caller is supposed to handle the exception as they are thrown &
     *           propagated to it.
     */
    public <R, S, P extends Message, Q extends Message, T extends Message> ConcurrentSkipListMap<Long, R> max(
            final HTable table, final ColumnInterpreter<R, S, P, Q, T> ci, final Scan scan) throws Throwable {
        final TimeseriesAggregateRequest requestArg = validateArgAndGetPB(scan, ci, false, intervalSeconds,
                timestampSecondsMin, timestampSecondsMax, keyFilterPattern);
        class MaxCallBack implements Batch.Callback<TimeseriesAggregateResponse> {
            ConcurrentSkipListMap<Long, R> max = new ConcurrentSkipListMap<Long, R>();

            ConcurrentSkipListMap<Long, R> getMax() {
                return max;
            }

            @Override
            public synchronized void update(byte[] region, byte[] row, TimeseriesAggregateResponse result) {
                List<TimeseriesAggregateResponseMapEntry> results = ((TimeseriesAggregateResponse) result)
                        .getEntryList();
                for (TimeseriesAggregateResponseMapEntry entry : results) {
                    R candidate;
                    if (entry.getValue().getFirstPartCount() > 0) {
                        ByteString b = entry.getValue().getFirstPart(0);
                        Q q = null;
                        try {
                            q = ProtobufUtil.getParsedGenericInstance(ci.getClass(), 3, b);
                        } catch (IOException e) {
                            // TODO Auto-generated catch block
                            e.printStackTrace();
                        }
                        candidate = ci.getCellValueFromProto(q);
                        if (null != q) {
                            if (max.containsKey(entry.getKey())) {
                                R current = max.get(entry.getKey());
                                max.put(entry.getKey(),
                                        (current == null
                                                || (candidate != null && ci.compare(current, candidate) < 0))
                                                        ? candidate
                                                        : current);
                            } else {
                                max.put(entry.getKey(), ci.getCellValueFromProto(q));
                            }
                        }
                    }
                }
            }
        }

        MaxCallBack aMaxCallBack = new MaxCallBack();
        table.coprocessorService(TimeseriesAggregateService.class, scan.getStartRow(), scan.getStopRow(),
                new Batch.Call<TimeseriesAggregateService, TimeseriesAggregateResponse>() {
                    @Override
                    public TimeseriesAggregateResponse call(TimeseriesAggregateService instance)
                            throws IOException {
                        ServerRpcController controller = new ServerRpcController();
                        BlockingRpcCallback<TimeseriesAggregateResponse> rpcCallback = new BlockingRpcCallback<TimeseriesAggregateResponse>();
                        instance.getMax(controller, requestArg, rpcCallback);
                        TimeseriesAggregateResponse response = rpcCallback.get();
                        if (controller.failedOnException()) {
                            throw controller.getFailedOn();
                        }
                        return response;
                    }
                }, aMaxCallBack);
        return aMaxCallBack.getMax();
    }

    /**
     * It gives the minimum value of a column for a given column family for the given range. In case
     * qualifier is null, a min of all values for the given family is returned.
     * @param tableName
     * @param ci
     * @param scan
     * @return min val ConcurrentSkipListMap<Long, R> (Will come as proto from region needs to be
     *         passed out as ConcurrentSkipListMap)
     * @throws Throwable The caller is supposed to handle the exception as they are thrown &
     *           propagated to it.
     */
    public <R, S, P extends Message, Q extends Message, T extends Message> ConcurrentSkipListMap<Long, R> min(
            final TableName tableName, final ColumnInterpreter<R, S, P, Q, T> ci, final Scan scan)
            throws Throwable {
        HTable table = null;
        try {
            table = new HTable(conf, tableName);
            return min(table, ci, scan);
        } finally {
            if (table != null) {
                table.close();
            }
        }
    }

    /**
     * It gives the minimum value of a column for a given column family for the given range. In case
     * qualifier is null, a min of all values for the given family is returned.
     * @param table
     * @param ci
     * @param scan
     * @return min val ConcurrentSkipListMap<Long, R> (Will come as proto from region needs to be
     *         passed out as ConcurrentSkipListMap)
     * @throws Throwable The caller is supposed to handle the exception as they are thrown &
     *           propagated to it.
     */
    public <R, S, P extends Message, Q extends Message, T extends Message> ConcurrentSkipListMap<Long, R> min(
            final HTable table, final ColumnInterpreter<R, S, P, Q, T> ci, final Scan scan) throws Throwable {
        final TimeseriesAggregateRequest requestArg = validateArgAndGetPB(scan, ci, false, intervalSeconds,
                timestampSecondsMin, timestampSecondsMax, keyFilterPattern);
        class MinCallBack implements Batch.Callback<TimeseriesAggregateResponse> {
            ConcurrentSkipListMap<Long, R> min = new ConcurrentSkipListMap<Long, R>();

            ConcurrentSkipListMap<Long, R> getMin() {
                return min;
            }

            @Override
            public synchronized void update(byte[] region, byte[] row, TimeseriesAggregateResponse result) {
                List<TimeseriesAggregateResponseMapEntry> results = ((TimeseriesAggregateResponse) result)
                        .getEntryList();
                for (TimeseriesAggregateResponseMapEntry entry : results) {
                    R candidate;
                    if (entry.getValue().getFirstPartCount() > 0) {
                        ByteString b = entry.getValue().getFirstPart(0);
                        Q q = null;
                        try {
                            q = ProtobufUtil.getParsedGenericInstance(ci.getClass(), 3, b);
                        } catch (IOException e) {
                            // TODO Auto-generated catch block
                            e.printStackTrace();
                        }
                        candidate = ci.getCellValueFromProto(q);
                        if (null != q) {
                            if (min.containsKey(entry.getKey())) {
                                R current = min.get(entry.getKey());
                                min.put(entry.getKey(),
                                        (current == null
                                                || (candidate != null && ci.compare(current, candidate) < 0))
                                                        ? current
                                                        : candidate);
                            } else {
                                min.put(entry.getKey(), ci.getCellValueFromProto(q));
                            }
                        }
                    }
                }
            }
        }

        MinCallBack aMinCallBack = new MinCallBack();
        table.coprocessorService(TimeseriesAggregateService.class, scan.getStartRow(), scan.getStopRow(),
                new Batch.Call<TimeseriesAggregateService, TimeseriesAggregateResponse>() {
                    @Override
                    public TimeseriesAggregateResponse call(TimeseriesAggregateService instance)
                            throws IOException {
                        ServerRpcController controller = new ServerRpcController();
                        BlockingRpcCallback<TimeseriesAggregateResponse> rpcCallback = new BlockingRpcCallback<TimeseriesAggregateResponse>();
                        instance.getMin(controller, requestArg, rpcCallback);
                        TimeseriesAggregateResponse response = rpcCallback.get();
                        if (controller.failedOnException()) {
                            throw controller.getFailedOn();
                        }
                        return response;
                    }
                }, aMinCallBack);
        return aMinCallBack.getMin();
    }

    /**
     * It gives the sum value of a column for a given column family for the given range. In case
     * qualifier is null, a sum of all values for the given family is returned.
     * @param tableName
     * @param ci
     * @param scan
     * @return sum val ConcurrentSkipListMap<Long, R> (Will come as proto from region needs to be
     *         passed out as ConcurrentSkipListMap)
     * @throws Throwable The caller is supposed to handle the exception as they are thrown &
     *           propagated to it.
     */
    public <R, S, P extends Message, Q extends Message, T extends Message> ConcurrentSkipListMap<Long, S> sum(
            final TableName tableName, final ColumnInterpreter<R, S, P, Q, T> ci, final Scan scan)
            throws Throwable {
        HTable table = null;
        try {
            table = new HTable(conf, tableName);
            return sum(table, ci, scan);
        } finally {
            if (table != null) {
                table.close();
            }
        }
    }

    /**
     * It gives the sum value of a column for a given column family for the given range. In case
     * qualifier is null, a sum of all values for the given family is returned.
     * @param table
     * @param ci
     * @param scan
     * @return sum val ConcurrentSkipListMap<Long, R> (Will come as proto from region needs to be
     *         passed out as ConcurrentSkipListMap)
     * @throws Throwable The caller is supposed to handle the exception as they are thrown &
     *           propagated to it.
     */
    public <R, S, P extends Message, Q extends Message, T extends Message> ConcurrentSkipListMap<Long, S> sum(
            final HTable table, final ColumnInterpreter<R, S, P, Q, T> ci, final Scan scan) throws Throwable {
        final TimeseriesAggregateRequest requestArg = validateArgAndGetPB(scan, ci, false, intervalSeconds,
                timestampSecondsMin, timestampSecondsMax, keyFilterPattern);
        class SumCallBack implements Batch.Callback<TimeseriesAggregateResponse> {
            ConcurrentSkipListMap<Long, S> sum = new ConcurrentSkipListMap<Long, S>();

            ConcurrentSkipListMap<Long, S> getMax() {
                return sum;
            }

            @Override
            public synchronized void update(byte[] region, byte[] row, TimeseriesAggregateResponse result) {
                List<TimeseriesAggregateResponseMapEntry> results = ((TimeseriesAggregateResponse) result)
                        .getEntryList();
                for (TimeseriesAggregateResponseMapEntry entry : results) {
                    S candidate;
                    if (entry.getValue().getFirstPartCount() == 0) {
                        if (!sum.containsKey(entry.getKey())) {
                            sum.put(entry.getKey(), null);
                        }
                    } else {
                        ByteString b = entry.getValue().getFirstPart(0);
                        T t = null;
                        try {
                            t = ProtobufUtil.getParsedGenericInstance(ci.getClass(), 4, b);
                        } catch (IOException e) {
                            // TODO Auto-generated catch block
                            e.printStackTrace();
                        }
                        candidate = ci.getPromotedValueFromProto(t);
                        if (null != t) {
                            if (sum.containsKey(entry.getKey())) {
                                S current = sum.get(entry.getKey());
                                sum.put(entry.getKey(), (ci.add(current, candidate)));
                            } else {
                                if (entry.getValue().getFirstPartCount() == 0) {
                                    sum.put(entry.getKey(), null);
                                } else {
                                    sum.put(entry.getKey(), candidate);
                                }
                            }
                        }
                    }
                }
            }
        }

        SumCallBack aSumCallBack = new SumCallBack();
        table.coprocessorService(TimeseriesAggregateService.class, scan.getStartRow(), scan.getStopRow(),
                new Batch.Call<TimeseriesAggregateService, TimeseriesAggregateResponse>() {
                    @Override
                    public TimeseriesAggregateResponse call(TimeseriesAggregateService instance)
                            throws IOException {
                        ServerRpcController controller = new ServerRpcController();
                        BlockingRpcCallback<TimeseriesAggregateResponse> rpcCallback = new BlockingRpcCallback<TimeseriesAggregateResponse>();
                        instance.getSum(controller, requestArg, rpcCallback);
                        TimeseriesAggregateResponse response = rpcCallback.get();
                        if (controller.failedOnException()) {
                            throw controller.getFailedOn();
                        }
                        // if (response.getEntryCount() > 0) {
                        return response;
                        // }
                        // return null;
                    }
                }, aSumCallBack);
        return aSumCallBack.getMax();
    }

    /**
     * It computes average while fetching sum and row count from all the corresponding regions.
     * Approach is to compute a global sum of region level sum and rowcount and then compute the
     * average.
     * @param table
     * @param scan
     * @throws Throwable
     */
    private <R, S, P extends Message, Q extends Message, T extends Message> ConcurrentSkipListMap<Long, Pair<S, Long>> getAvgArgs(
            final HTable table, final ColumnInterpreter<R, S, P, Q, T> ci, final Scan scan) throws Throwable {
        final TimeseriesAggregateRequest requestArg = validateArgAndGetPB(scan, ci, false, intervalSeconds,
                timestampSecondsMin, timestampSecondsMax, keyFilterPattern);
        class AvgCallBack implements Batch.Callback<TimeseriesAggregateResponse> {
            ConcurrentSkipListMap<Long, Pair<S, Long>> averages = new ConcurrentSkipListMap<Long, Pair<S, Long>>();

            public synchronized ConcurrentSkipListMap<Long, Pair<S, Long>> getAvgArgs() {
                return averages;
            }

            @Override
            public synchronized void update(byte[] region, byte[] row, TimeseriesAggregateResponse result) {
                List<TimeseriesAggregateResponseMapEntry> results = result.getEntryList();
                for (TimeseriesAggregateResponseMapEntry entry : results) {

                    if (entry.getValue().getFirstPartCount() == 0) {
                        if (!averages.containsKey(entry.getKey())) {
                            averages.put(entry.getKey(), new Pair<S, Long>(null, 0L));
                        }
                    } else {

                        ByteString b = entry.getValue().getFirstPart(0);
                        T t = null;
                        try {
                            t = ProtobufUtil.getParsedGenericInstance(ci.getClass(), 4, b);
                        } catch (IOException e) {
                            // TODO Auto-generated catch block
                            e.printStackTrace();
                        }
                        S s = ci.getPromotedValueFromProto(t);

                        ByteBuffer bb = ByteBuffer.allocate(8)
                                .put(getBytesFromResponse(entry.getValue().getSecondPart()));
                        bb.rewind();

                        if (averages.containsKey(entry.getKey())) {
                            S sum = averages.get(entry.getKey()).getFirst();
                            Long rowCount = averages.get(entry.getKey()).getSecond();
                            averages.put(entry.getKey(),
                                    new Pair<S, Long>(ci.add(sum, s), rowCount + bb.getLong()));
                        } else {
                            averages.put(entry.getKey(), new Pair<S, Long>(s, bb.getLong()));
                        }
                    }
                }
            }
        }
        AvgCallBack avgCallBack = new AvgCallBack();
        table.coprocessorService(TimeseriesAggregateService.class, scan.getStartRow(), scan.getStopRow(),
                new Batch.Call<TimeseriesAggregateService, TimeseriesAggregateResponse>() {
                    @Override
                    public TimeseriesAggregateResponse call(TimeseriesAggregateService instance)
                            throws IOException {
                        ServerRpcController controller = new ServerRpcController();
                        BlockingRpcCallback<TimeseriesAggregateResponse> rpcCallback = new BlockingRpcCallback<TimeseriesAggregateResponse>();
                        instance.getAvg(controller, requestArg, rpcCallback);
                        TimeseriesAggregateResponse response = rpcCallback.get();
                        if (controller.failedOnException()) {
                            throw controller.getFailedOn();
                        }
                        // if (response.getEntryCount() > 0) {
                        return response;
                        // }
                        // return null;
                    }
                }, avgCallBack);
        return avgCallBack.getAvgArgs();
    }

    /**
     * This is the client side interface/handle for calling the average method for a given cf-cq
     * combination. It was necessary to add one more call stack as its return type should be a decimal
     * value, irrespective of what columninterpreter says. So, this methods collects the necessary
     * parameters to compute the average and returs the double value.
     * @param tableName
     * @param ci
     * @param scan
     * @return <R, S>
     * @throws Throwable
     */
    public <R, S, P extends Message, Q extends Message, T extends Message> ConcurrentSkipListMap<Long, Double> avg(
            final TableName tableName, final ColumnInterpreter<R, S, P, Q, T> ci, Scan scan) throws Throwable {
        HTable table = null;
        try {
            table = new HTable(conf, tableName);
            return avg(table, ci, scan);
        } finally {
            if (table != null) {
                table.close();
            }
        }
    }

    /**
     * This is the client side interface/handle for calling the average method for a given cf-cq
     * combination. It was necessary to add one more call stack as its return type should be a decimal
     * value, irrespective of what columninterpreter says. So, this methods collects the necessary
     * parameters to compute the average and returs the double value.
     * @param table
     * @param ci
     * @param scan
     * @return <R, S>
     * @throws Throwable
     */
    public <R, S, P extends Message, Q extends Message, T extends Message> ConcurrentSkipListMap<Long, Double> avg(
            final HTable table, final ColumnInterpreter<R, S, P, Q, T> ci, Scan scan) throws Throwable {
        ConcurrentSkipListMap<Long, Pair<S, Long>> p = getAvgArgs(table, ci, scan);
        ConcurrentSkipListMap<Long, Double> avg = new ConcurrentSkipListMap<Long, Double>();
        for (Map.Entry<Long, Pair<S, Long>> entry : p.entrySet()) {
            avg.put(entry.getKey(), ci.divideForAvg(entry.getValue().getFirst(), entry.getValue().getSecond()));
        }
        return avg;
    }

    byte[] getBytesFromResponse(ByteString response) {
        ByteBuffer bb = response.asReadOnlyByteBuffer();
        bb.rewind();
        byte[] bytes;
        if (bb.hasArray()) {
            bytes = bb.array();
        } else {
            bytes = response.toByteArray();
        }
        return bytes;
    }

    /*
     * @param scan
     * @param canFamilyBeAbsent whether column family can be absent in familyMap of scan
     */
    private void validateParameters(Scan scan, boolean canFamilyBeAbsent) throws IOException {
        if (scan == null
                || (Bytes.equals(scan.getStartRow(), scan.getStopRow())
                        && !Bytes.equals(scan.getStartRow(), HConstants.EMPTY_START_ROW))
                || ((Bytes.compareTo(scan.getStartRow(), scan.getStopRow()) > 0)
                        && !Bytes.equals(scan.getStopRow(), HConstants.EMPTY_END_ROW))) {
            throw new IOException("Agg client Exception: Startrow should be smaller than Stoprow");
        } else if (!canFamilyBeAbsent) {
            if (scan.getFamilyMap().size() != 1) {
                throw new IOException("There must be only one family.");
            }
        }
    }

    <R, S, P extends Message, Q extends Message, T extends Message> TimeseriesAggregateRequest validateArgAndGetPB(
            Scan scan, ColumnInterpreter<R, S, P, Q, T> ci, boolean canFamilyBeAbsent, int intervalSeconds,
            Integer timestampSecondsMin, Integer timestampSecondsMax, String keyFilterPattern) throws IOException {
        validateParameters(scan, canFamilyBeAbsent);
        final TimeseriesAggregateRequest.Builder requestBuilder = TimeseriesAggregateRequest.newBuilder();
        requestBuilder.setInterpreterClassName(ci.getClass().getCanonicalName());
        P columnInterpreterSpecificData = null;
        if ((columnInterpreterSpecificData = ci.getRequestData()) != null) {
            requestBuilder.setInterpreterSpecificBytes(columnInterpreterSpecificData.toByteString());
        }
        requestBuilder.setScan(ProtobufUtil.toScan(scan));

        requestBuilder.setTimeIntervalSeconds(intervalSeconds);
        if (!(null == timestampSecondsMin)) {
            final TimeseriesRange.Builder rangeBuilder = TimeseriesRange.newBuilder();
            rangeBuilder.setKeyTimestampMin(timestampSecondsMin);
            rangeBuilder.setKeyTimestampMax(timestampSecondsMax);
            rangeBuilder.setKeyTimestampFilterPattern(keyFilterPattern);
            requestBuilder.setRange(rangeBuilder.build());
        }
        return requestBuilder.build();
    }
}