co.cask.cdap.metrics.data.TimeSeriesTable.java Source code

Java tutorial

Introduction

Here is the source code for co.cask.cdap.metrics.data.TimeSeriesTable.java

Source

/*
 * Copyright  2014 Cask Data, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */
package co.cask.cdap.metrics.data;

import co.cask.cdap.api.common.Bytes;
import co.cask.cdap.api.dataset.table.Row;
import co.cask.cdap.api.dataset.table.Scanner;
import co.cask.cdap.common.utils.ImmutablePair;
import co.cask.cdap.data2.OperationException;
import co.cask.cdap.data2.StatusCode;
import co.cask.cdap.data2.dataset2.lib.table.FuzzyRowFilter;
import co.cask.cdap.data2.dataset2.lib.table.MetricsTable;
import co.cask.cdap.metrics.MetricsConstants;
import co.cask.cdap.metrics.transport.MetricsRecord;
import co.cask.cdap.metrics.transport.TagMetric;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;

import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.NavigableMap;

/**
 * Table for storing time series metrics.
 * <p>
 * Row key:
 * {@code context|metricName|tags|timebase|runId}
 * </p>
 * <p>
 * Columns: offset to timebase of the row.
 * </p>
 * <p>
 * Cell: Value for the metrics specified by the row with at the timestamp of (timbase + offset) * resolution.
 * </p>
 * <p>
 * TODO: More doc.
 * </p>
 */
public final class TimeSeriesTable {

    private static final int MAX_ROLL_TIME = 0xfffe;
    private static final byte[] FOUR_ZERO_BYTES = { 0, 0, 0, 0 };
    private static final byte[] FOUR_ONE_BYTES = { 1, 1, 1, 1 };

    private final MetricsTable timeSeriesTable;
    private final MetricsEntityCodec entityCodec;
    private final int resolution;
    private final int rollTimebaseInterval;
    private final ImmutablePair<byte[], byte[]> defaultTagFuzzyPair;

    // Cache for delta values.
    private final byte[][] deltaCache;

    /**
     * Creates a MetricTable.
     *
     * @param timeSeriesTable A OVC table for storing metric information.
     * @param entityCodec The {@link MetricsEntityCodec} for encoding entity.
     * @param resolution Resolution in second of the table
     * @param rollTime Number of resolution for writing to a new row with a new timebase.
     *                 Meaning the differences between timebase of two consecutive rows divided by
     *                 resolution seconds. It essentially defines how many columns per row in the table.
     *                 This value should be < 65535.
     */
    TimeSeriesTable(MetricsTable timeSeriesTable, MetricsEntityCodec entityCodec, int resolution, int rollTime) {

        this.timeSeriesTable = timeSeriesTable;
        this.entityCodec = entityCodec;
        this.resolution = resolution;

        // Two bytes for column name, which is a delta timestamp
        Preconditions.checkArgument(rollTime <= MAX_ROLL_TIME, "Rolltime should be <= " + MAX_ROLL_TIME);
        this.rollTimebaseInterval = rollTime * resolution;
        this.deltaCache = createDeltaCache(rollTime);

        this.defaultTagFuzzyPair = createDefaultTagFuzzyPair();
    }

    /**
     * Saves a collection of {@link co.cask.cdap.metrics.transport.MetricsRecord}.
     */
    public void save(Iterable<MetricsRecord> records) throws OperationException {
        save(records.iterator());
    }

    public void save(Iterator<MetricsRecord> records) throws OperationException {
        if (!records.hasNext()) {
            return;
        }

        // Simply collecting all rows/cols/values that need to be put to the underlying table.
        NavigableMap<byte[], NavigableMap<byte[], byte[]>> table = Maps.newTreeMap(Bytes.BYTES_COMPARATOR);

        while (records.hasNext()) {
            getUpdates(records.next(), table);
        }

        try {
            timeSeriesTable.put(table);
        } catch (Exception e) {
            throw new OperationException(StatusCode.INTERNAL_ERROR, e.getMessage(), e);
        }
    }

    public MetricsScanner scan(MetricsScanQuery query) throws OperationException {
        return scanFor(query, false);
    }

    public MetricsScanner scanAllTags(MetricsScanQuery query) throws OperationException {
        return scanFor(query, true);
    }

    /**
     * Deletes all the row keys which match the context prefix.
     *
     * @param contextPrefix Prefix of the context to match.  Must not be null, as full table deletes should be done
     *                      through the clear method.
     * @throws OperationException if there is an error in deleting entries.
     */
    public void delete(String contextPrefix) throws OperationException {
        Preconditions.checkArgument(contextPrefix != null, "null context not allowed for delete");
        try {
            timeSeriesTable.deleteAll(entityCodec.encodeWithoutPadding(MetricsEntityType.CONTEXT, contextPrefix));
        } catch (Exception e) {
            throw new OperationException(StatusCode.INTERNAL_ERROR, e.getMessage(), e);
        }
    }

    /**
     * Deletes all the row keys which match the context prefix and metric prefix.  Context and Metric cannot both be
     * null, as full table deletes should be done through the clear method.
     *
     * @param contextPrefix Prefix of the context to match, null means any context.
     * @param metricPrefix Prefix of the metric to match, null means any metric.
     * @throws OperationException if there is an error in deleting entries.
     */
    public void delete(String contextPrefix, String metricPrefix) throws OperationException {
        Preconditions.checkArgument(contextPrefix != null || metricPrefix != null,
                "context and metric cannot both be null");
        if (metricPrefix == null) {
            delete(contextPrefix);
        } else {
            byte[] startRow = getPaddedKey(contextPrefix, "0", metricPrefix, null, 0, 0);
            byte[] endRow = getPaddedKey(contextPrefix, "0", metricPrefix, null, Integer.MAX_VALUE, 0xff);
            try {
                // Create fuzzy row filter
                ImmutablePair<byte[], byte[]> contextPair = entityCodec.paddedFuzzyEncode(MetricsEntityType.CONTEXT,
                        contextPrefix, 0);
                ImmutablePair<byte[], byte[]> metricPair = entityCodec.paddedFuzzyEncode(MetricsEntityType.METRIC,
                        metricPrefix, 0);
                ImmutablePair<byte[], byte[]> tagPair = entityCodec.paddedFuzzyEncode(MetricsEntityType.TAG, null,
                        0);
                ImmutablePair<byte[], byte[]> runIdPair = entityCodec.paddedFuzzyEncode(MetricsEntityType.RUN, null,
                        0);
                FuzzyRowFilter filter = new FuzzyRowFilter(ImmutableList.of(ImmutablePair.of(
                        Bytes.concat(contextPair.getFirst(), metricPair.getFirst(), tagPair.getFirst(),
                                Bytes.toBytes(0), runIdPair.getFirst()),
                        Bytes.concat(contextPair.getSecond(), metricPair.getSecond(), tagPair.getSecond(),
                                FOUR_ONE_BYTES, runIdPair.getSecond()))));

                timeSeriesTable.deleteRange(startRow, endRow, null, filter);
            } catch (Exception e) {
                throw new OperationException(StatusCode.INTERNAL_ERROR, e.getMessage(), e);
            }
        }
    }

    /**
     * Delete all entries that would match the given scan query.
     *
     * @param query Query specifying context, metric, runid, tag, and time range of entries to delete.  A null value for
     *              context, metric, and runId will match any value for those fields.  A null value for tag will
     *              match untagged entries, which is the same as using MetricsConstants.EMPTY_TAG.
     * @throws OperationException
     */
    public void delete(MetricsScanQuery query) throws OperationException {
        try {
            ScannerFields fields = getScannerFields(query);
            timeSeriesTable.deleteRange(fields.startRow, fields.endRow, fields.columns, fields.filter);
        } catch (Exception e) {
            throw new OperationException(StatusCode.INTERNAL_ERROR, e.getMessage(), e);
        }
    }

    /**
     * Deletes all row keys that has timestamp before the given time.
     * @param beforeTime All data before this timestamp will be removed (exclusive).
     */
    public void deleteBefore(long beforeTime) throws OperationException {
        // End time base is the last time base that is smaller than endTime.
        int endTimeBase = getTimeBase(beforeTime);

        Scanner scanner = null;
        try {
            scanner = timeSeriesTable.scan(null, null, null, null);

            // Loop through the scanner entries and collect rows to be deleted
            List<byte[]> rows = Lists.newArrayList();
            Row nextEntry;
            while ((nextEntry = scanner.next()) != null) {
                byte[] rowKey = nextEntry.getRow();

                // Decode timestamp
                int offset = entityCodec.getEncodedSize(MetricsEntityType.CONTEXT)
                        + entityCodec.getEncodedSize(MetricsEntityType.METRIC)
                        + entityCodec.getEncodedSize(MetricsEntityType.TAG);
                int timeBase = Bytes.toInt(rowKey, offset, 4);
                if (timeBase < endTimeBase) {
                    rows.add(rowKey);
                }
            }
            // If there is any row collected, delete them
            if (!rows.isEmpty()) {
                timeSeriesTable.delete(rows);
            }
        } catch (Exception e) {
            throw new OperationException(StatusCode.INTERNAL_ERROR, e.getMessage(), e);
        } finally {
            if (scanner != null) {
                scanner.close();
            }
        }
    }

    /**
     * Clears the storage table.
     * @throws OperationException If error in clearing data.
     */
    public void clear() throws OperationException {
        try {
            timeSeriesTable.deleteAll(new byte[] {});
        } catch (Exception e) {
            throw new OperationException(StatusCode.INTERNAL_ERROR, e.getMessage(), e);
        }
    }

    private MetricsScanner scanFor(MetricsScanQuery query, boolean shouldMatchAllTags) throws OperationException {
        try {
            ScannerFields fields = getScannerFields(query, shouldMatchAllTags);
            Scanner scanner = timeSeriesTable.scan(fields.startRow, fields.endRow, fields.columns, fields.filter);
            return new MetricsScanner(query, scanner, entityCodec, resolution);
        } catch (Exception e) {
            throw new OperationException(StatusCode.INTERNAL_ERROR, e.getMessage(), e);
        }
    }

    /**
     * Setups all rows, columns and values for updating the metric table.
     */
    private void getUpdates(MetricsRecord record, NavigableMap<byte[], NavigableMap<byte[], byte[]>> table) {
        long timestamp = record.getTimestamp() / resolution * resolution;
        int timeBase = getTimeBase(timestamp);

        // Key for the no tag one
        byte[] rowKey = getKey(record.getContext(), record.getRunId(), record.getName(), null, timeBase);

        // delta is guaranteed to be 2 bytes.
        byte[] column = deltaCache[(int) (timestamp - timeBase)];

        addValue(rowKey, column, table, record.getValue());

        // Save tags metrics
        for (TagMetric tag : record.getTags()) {
            rowKey = getKey(record.getContext(), record.getRunId(), record.getName(), tag.getTag(), timeBase);
            addValue(rowKey, column, table, tag.getValue());
        }
    }

    private void addValue(byte[] rowKey, byte[] column, NavigableMap<byte[], NavigableMap<byte[], byte[]>> table,
            int value) {
        byte[] oldValue = get(table, rowKey, column);
        int newValue = value;
        if (oldValue != null) {
            newValue = Bytes.toInt(oldValue) + value;
        }
        put(table, rowKey, column, Bytes.toBytes(newValue));
    }

    private static byte[] get(NavigableMap<byte[], NavigableMap<byte[], byte[]>> table, byte[] row, byte[] column) {
        NavigableMap<byte[], byte[]> rowMap = table.get(row);
        return rowMap == null ? null : rowMap.get(column);
    }

    private static void put(NavigableMap<byte[], NavigableMap<byte[], byte[]>> table, byte[] row, byte[] column,
            byte[] value) {
        NavigableMap<byte[], byte[]> rowMap = table.get(row);
        if (rowMap == null) {
            rowMap = Maps.newTreeMap(Bytes.BYTES_COMPARATOR);
            table.put(row, rowMap);
        }

        rowMap.put(column, value);
    }

    /**
     * Creates the row key for the given context, metric, tag, and timebase.
     */
    private byte[] getKey(String context, String runId, String metric, String tag, int timeBase) {
        Preconditions.checkArgument(context != null, "Context cannot be null.");
        Preconditions.checkArgument(runId != null, "RunId cannot be null.");
        Preconditions.checkArgument(metric != null, "Metric cannot be null.");

        return Bytes.concat(entityCodec.encode(MetricsEntityType.CONTEXT, context),
                entityCodec.encode(MetricsEntityType.METRIC, metric),
                entityCodec.encode(MetricsEntityType.TAG, tag == null ? MetricsConstants.EMPTY_TAG : tag),
                Bytes.toBytes(timeBase), entityCodec.encode(MetricsEntityType.RUN, runId));
    }

    private byte[] getPaddedKey(String contextPrefix, String runId, String metricPrefix, String tagPrefix,
            int timeBase, int padding) {

        // If there is no contextPrefix, metricPrefix or runId, just applies the padding
        return Bytes.concat(entityCodec.paddedEncode(MetricsEntityType.CONTEXT, contextPrefix, padding),
                entityCodec.paddedEncode(MetricsEntityType.METRIC, metricPrefix, padding),
                entityCodec.paddedEncode(MetricsEntityType.TAG, tagPrefix, padding), Bytes.toBytes(timeBase),
                entityCodec.paddedEncode(MetricsEntityType.RUN, runId, padding));
    }

    private FuzzyRowFilter getFilter(MetricsScanQuery query, long startTimeBase, long endTimeBase,
            boolean shouldMatchAllTags) {
        String tag = query.getTagPrefix();

        // Create fuzzy row filter
        ImmutablePair<byte[], byte[]> contextPair = entityCodec.paddedFuzzyEncode(MetricsEntityType.CONTEXT,
                query.getContextPrefix(), 0);
        ImmutablePair<byte[], byte[]> metricPair = entityCodec.paddedFuzzyEncode(MetricsEntityType.METRIC,
                query.getMetricPrefix(), 0);
        ImmutablePair<byte[], byte[]> tagPair = (!shouldMatchAllTags && tag == null) ? defaultTagFuzzyPair
                : entityCodec.paddedFuzzyEncode(MetricsEntityType.TAG, tag, 0);
        ImmutablePair<byte[], byte[]> runIdPair = entityCodec.paddedFuzzyEncode(MetricsEntityType.RUN,
                query.getRunId(), 0);

        // For each timbase, construct a fuzzy filter pair
        List<ImmutablePair<byte[], byte[]>> fuzzyPairs = Lists.newLinkedList();
        for (long timeBase = startTimeBase; timeBase <= endTimeBase; timeBase += this.rollTimebaseInterval) {
            fuzzyPairs.add(ImmutablePair.of(
                    Bytes.concat(contextPair.getFirst(), metricPair.getFirst(), tagPair.getFirst(),
                            Bytes.toBytes((int) timeBase), runIdPair.getFirst()),
                    Bytes.concat(contextPair.getSecond(), metricPair.getSecond(), tagPair.getSecond(),
                            FOUR_ZERO_BYTES, runIdPair.getSecond())));
        }

        return new FuzzyRowFilter(fuzzyPairs);
    }

    /**
     * Returns timebase computed with the table setting for the given timestamp.
     */
    private int getTimeBase(long time) {
        // We are using 4 bytes timebase for row
        long timeBase = time / rollTimebaseInterval * rollTimebaseInterval;
        Preconditions.checkArgument(timeBase < 0x100000000L, "Timestamp is too large.");
        return (int) timeBase;
    }

    private byte[][] createDeltaCache(int rollTime) {
        byte[][] deltas = new byte[rollTime + 1][];

        for (int i = 0; i <= rollTime; i++) {
            deltas[i] = Bytes.toBytes((short) i);
        }
        return deltas;
    }

    private ImmutablePair<byte[], byte[]> createDefaultTagFuzzyPair() {
        byte[] key = entityCodec.encode(MetricsEntityType.TAG, MetricsConstants.EMPTY_TAG);
        byte[] mask = new byte[key.length];
        Arrays.fill(mask, (byte) 0);
        return new ImmutablePair<byte[], byte[]>(key, mask);
    }

    private ScannerFields getScannerFields(MetricsScanQuery query) {
        return getScannerFields(query, false);
    }

    private ScannerFields getScannerFields(MetricsScanQuery query, boolean shouldMatchAllTags) {
        int startTimeBase = getTimeBase(query.getStartTime());
        int endTimeBase = getTimeBase(query.getEndTime());

        byte[][] columns = null;
        if (startTimeBase == endTimeBase) {
            // If on the same timebase, we only need subset of columns
            int startCol = (int) (query.getStartTime() - startTimeBase) / resolution;
            int endCol = (int) (query.getEndTime() - endTimeBase) / resolution;
            columns = new byte[endCol - startCol + 1][];

            for (int i = 0; i < columns.length; i++) {
                columns[i] = Bytes.toBytes((short) (startCol + i));
            }
        }

        String tagPrefix = query.getTagPrefix();
        if (!shouldMatchAllTags && tagPrefix == null) {
            tagPrefix = MetricsConstants.EMPTY_TAG;
        }
        byte[] startRow = getPaddedKey(query.getContextPrefix(), query.getRunId(), query.getMetricPrefix(),
                tagPrefix, startTimeBase, 0);
        byte[] endRow = getPaddedKey(query.getContextPrefix(), query.getRunId(), query.getMetricPrefix(), tagPrefix,
                endTimeBase + 1, 0xff);
        FuzzyRowFilter filter = getFilter(query, startTimeBase, endTimeBase, shouldMatchAllTags);

        return new ScannerFields(startRow, endRow, columns, filter);
    }

    private class ScannerFields {
        private final byte[] startRow;
        private final byte[] endRow;
        private final byte[][] columns;
        private final FuzzyRowFilter filter;

        ScannerFields(byte[] startRow, byte[] endRow, byte[][] columns, FuzzyRowFilter filter) {
            this.startRow = startRow;
            this.endRow = endRow;
            this.columns = columns;
            this.filter = filter;
        }
    }
}