org.apache.cassandra.db.clock.IncrementCounterContext.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.cassandra.db.clock.IncrementCounterContext.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.cassandra.db.clock;

import java.net.InetAddress;
import java.net.UnknownHostException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.cassandra.db.DBConstants;
import org.apache.cassandra.db.IClock.ClockRelationship;
import org.apache.cassandra.utils.FBUtilities;
import org.apache.commons.lang.ArrayUtils;

/**
 * An implementation of a distributed increment-only counter context.
 *
 * The data structure is:
 *   1) timestamp,
 *   2) timestamp of the latest known delete, and
 *   3) a list of (node id, count) pairs.
 *
 * On update:
 *   1) update timestamp to max(timestamp, local time), and
 *   2) the node updating the value will increment its associated content.
 *
 * The aggregated count can then be determined by rolling up all the counts from each
 * (node id, count) pair.  NOTE: only a given node id may increment its associated
 * count and care must be taken that (node id, count) pairs are correctly made
 * consistent.
 */
public class IncrementCounterContext implements IContext {
    public static final int TIMESTAMP_LENGTH = DBConstants.longSize_;
    public static final int HEADER_LENGTH = TIMESTAMP_LENGTH * 2; //2xlong

    private static final int idLength;
    private static final FBUtilities.ByteArrayWrapper idWrapper;
    private static final int countLength = DBConstants.longSize_;
    private static final int stepLength; // length: id + count

    // lazy-load singleton
    private static class LazyHolder {
        private static final IncrementCounterContext incrementCounterContext = new IncrementCounterContext();
    }

    static {
        byte[] id = FBUtilities.getLocalAddress().getAddress();
        idLength = id.length;
        idWrapper = new FBUtilities.ByteArrayWrapper(id);
        stepLength = idLength + countLength;
    }

    public static IncrementCounterContext instance() {
        return LazyHolder.incrementCounterContext;
    }

    /**
     * Creates an initial counter context.
     *
     * @return an empty counter context.
     */
    public byte[] create() {
        byte[] context = new byte[HEADER_LENGTH];
        FBUtilities.copyIntoBytes(context, 0, System.currentTimeMillis());
        FBUtilities.copyIntoBytes(context, TIMESTAMP_LENGTH, 0L);
        return context;
    }

    public byte[] createMin() {
        byte[] rv = new byte[HEADER_LENGTH];
        FBUtilities.copyIntoBytes(rv, 0, Long.MIN_VALUE);
        FBUtilities.copyIntoBytes(rv, TIMESTAMP_LENGTH, 0L);
        return rv;
    }

    // write a tuple (node id, count) at the front
    protected static void writeElement(byte[] context, byte[] id, long count) {
        writeElementAtStepOffset(context, 0, id, count);
    }

    // write a tuple (node id, count) at step offset
    protected static void writeElementAtStepOffset(byte[] context, int stepOffset, byte[] id, long count) {
        int offset = HEADER_LENGTH + (stepOffset * stepLength);
        System.arraycopy(id, 0, context, offset, idLength);
        FBUtilities.copyIntoBytes(context, offset + idLength, count);
    }

    public byte[] update(byte[] context, InetAddress node, long delta) {
        // update timestamp
        FBUtilities.copyIntoBytes(context, 0, System.currentTimeMillis());

        // calculate node id
        byte[] nodeId = node.getAddress();

        // look for this node id
        for (int offset = HEADER_LENGTH; offset < context.length; offset += stepLength) {
            if (FBUtilities.compareByteSubArrays(context, offset, nodeId, 0, idLength) != 0)
                continue;

            // node id found: increment count, shift to front
            long count = FBUtilities.byteArrayToLong(context, offset + idLength);

            System.arraycopy(context, HEADER_LENGTH, context, HEADER_LENGTH + stepLength, offset - HEADER_LENGTH);
            writeElement(context, nodeId, count + delta);

            return context;
        }

        // node id not found: widen context
        byte[] previous = context;
        context = new byte[previous.length + stepLength];

        System.arraycopy(previous, 0, context, 0, HEADER_LENGTH);
        writeElement(context, nodeId, delta);
        System.arraycopy(previous, HEADER_LENGTH, context, HEADER_LENGTH + stepLength,
                previous.length - HEADER_LENGTH);

        return context;
    }

    // swap bytes of step length in context
    protected static void swapElement(byte[] context, int left, int right) {
        if (left == right)
            return;

        byte temp;
        for (int i = 0; i < stepLength; i++) {
            temp = context[left + i];
            context[left + i] = context[right + i];
            context[right + i] = temp;
        }
    }

    // partition bytes of step length in context (for quicksort)
    protected static int partitionElements(byte[] context, int left, int right, int pivotIndex) {
        int leftOffset = HEADER_LENGTH + (left * stepLength);
        int rightOffset = HEADER_LENGTH + (right * stepLength);
        int pivotOffset = HEADER_LENGTH + (pivotIndex * stepLength);

        byte[] pivotValue = ArrayUtils.subarray(context, pivotOffset, pivotOffset + stepLength);
        swapElement(context, pivotOffset, rightOffset);
        int storeOffset = leftOffset;
        for (int i = leftOffset; i < rightOffset; i += stepLength) {
            if (FBUtilities.compareByteSubArrays(context, i, pivotValue, 0, stepLength) <= 0) {
                swapElement(context, i, storeOffset);
                storeOffset += stepLength;
            }
        }
        swapElement(context, storeOffset, rightOffset);
        return (storeOffset - HEADER_LENGTH) / stepLength;
    }

    // quicksort helper
    protected static void sortElementsByIdHelper(byte[] context, int left, int right) {
        if (right <= left)
            return;

        int pivotIndex = (left + right) / 2;
        int pivotIndexNew = partitionElements(context, left, right, pivotIndex);
        sortElementsByIdHelper(context, left, pivotIndexNew - 1);
        sortElementsByIdHelper(context, pivotIndexNew + 1, right);
    }

    // quicksort context by id
    protected static byte[] sortElementsById(byte[] context) {
        assert 0 == ((context.length - HEADER_LENGTH) % stepLength) : "context size is not correct.";
        sortElementsByIdHelper(context, 0, (int) ((context.length - HEADER_LENGTH) / stepLength) - 1);
        return context;
    }

    /**
     * Determine the last modified relationship between two contexts.
     *
     * Strategy:
     *  compare highest timestamp between contexts.
     *
     * @param left
     *            counter context.
     * @param right
     *            counter context.
     * @return the ClockRelationship between the contexts.
     */
    public ClockRelationship compare(byte[] left, byte[] right) {
        long maxDeleteTimestamp = Math.max(FBUtilities.byteArrayToLong(left, TIMESTAMP_LENGTH),
                FBUtilities.byteArrayToLong(right, TIMESTAMP_LENGTH));

        long leftTimestamp = FBUtilities.byteArrayToLong(left, 0);
        long rightTimestamp = FBUtilities.byteArrayToLong(right, 0);

        // obsolete context due to being older then the last known delete
        if (leftTimestamp < maxDeleteTimestamp) {
            return ClockRelationship.LESS_THAN;
        } else if (rightTimestamp < maxDeleteTimestamp) {
            return ClockRelationship.GREATER_THAN;
        }

        if (leftTimestamp < rightTimestamp) {
            return ClockRelationship.LESS_THAN;
        } else if (leftTimestamp == rightTimestamp) {
            return ClockRelationship.EQUAL;
        }
        return ClockRelationship.GREATER_THAN;
    }

    /**
     * Determine the count relationship between two contexts.
     *
     * Strategy:
     *  compare node count values (like a version vector).
     *
     * @param left
     *            counter context.
     * @param right
     *            counter context.
     * @return the ClockRelationship between the contexts.
     */
    public ClockRelationship diff(byte[] left, byte[] right) {
        left = sortElementsById(left);
        right = sortElementsById(right);

        ClockRelationship relationship = ClockRelationship.EQUAL;

        int leftIndex = HEADER_LENGTH;
        int rightIndex = HEADER_LENGTH;
        while (leftIndex < left.length && rightIndex < right.length) {
            // compare id bytes
            int compareId = FBUtilities.compareByteSubArrays(left, leftIndex, right, rightIndex, idLength);
            if (compareId == 0) {
                long leftCount = FBUtilities.byteArrayToLong(left, leftIndex + idLength);
                long rightCount = FBUtilities.byteArrayToLong(right, rightIndex + idLength);

                // advance indexes
                leftIndex += stepLength;
                rightIndex += stepLength;

                // process count comparisons
                if (leftCount == rightCount) {
                    continue;
                } else if (leftCount > rightCount) {
                    if (relationship == ClockRelationship.EQUAL) {
                        relationship = ClockRelationship.GREATER_THAN;
                    } else if (relationship == ClockRelationship.GREATER_THAN) {
                        continue;
                    } else {
                        // relationship == ClockRelationship.LESS_THAN
                        return ClockRelationship.DISJOINT;
                    }
                } else
                // leftCount < rightCount
                {
                    if (relationship == ClockRelationship.EQUAL) {
                        relationship = ClockRelationship.LESS_THAN;
                    } else if (relationship == ClockRelationship.GREATER_THAN) {
                        return ClockRelationship.DISJOINT;
                    } else {
                        // relationship == ClockRelationship.LESS_THAN
                        continue;
                    }
                }
            } else if (compareId > 0) {
                // only advance the right context
                rightIndex += stepLength;

                if (relationship == ClockRelationship.EQUAL) {
                    relationship = ClockRelationship.LESS_THAN;
                } else if (relationship == ClockRelationship.GREATER_THAN) {
                    return ClockRelationship.DISJOINT;
                } else {
                    // relationship == ClockRelationship.LESS_THAN
                    continue;
                }
            } else {
                // compareId < 0
                // only advance the left context
                leftIndex += stepLength;

                if (relationship == ClockRelationship.EQUAL) {
                    relationship = ClockRelationship.GREATER_THAN;
                } else if (relationship == ClockRelationship.GREATER_THAN) {
                    continue;
                } else
                // relationship == ClockRelationship.LESS_THAN
                {
                    return ClockRelationship.DISJOINT;
                }
            }
        }

        // check final lengths
        if (leftIndex < left.length) {
            if (relationship == ClockRelationship.EQUAL) {
                return ClockRelationship.GREATER_THAN;
            } else if (relationship == ClockRelationship.LESS_THAN) {
                return ClockRelationship.DISJOINT;
            }
        } else if (rightIndex < right.length) {
            if (relationship == ClockRelationship.EQUAL) {
                return ClockRelationship.LESS_THAN;
            } else if (relationship == ClockRelationship.GREATER_THAN) {
                return ClockRelationship.DISJOINT;
            }
        }

        return relationship;
    }

    /**
     * Return a context w/ an aggregated count for each node id.
     *
     * @param contexts
     *            a list of contexts to be merged
     */
    public byte[] merge(List<byte[]> contexts) {
        // strategy:
        //   1) take highest timestamp
        //   2) take highest delete timestamp
        //   3) map id -> count
        //      a) local id:  sum counts; keep highest timestamp
        //      b) remote id: keep highest count (reconcile)
        //   4) create a context from sorted array
        long highestTimestamp = Long.MIN_VALUE;
        long highestDeleteTimestamp = Long.MIN_VALUE;
        Map<FBUtilities.ByteArrayWrapper, Long> contextsMap = new HashMap<FBUtilities.ByteArrayWrapper, Long>();
        for (byte[] context : contexts) {
            // take highest timestamp
            highestTimestamp = Math.max(FBUtilities.byteArrayToLong(context, 0), highestTimestamp);
            highestDeleteTimestamp = Math.max(FBUtilities.byteArrayToLong(context, TIMESTAMP_LENGTH),
                    highestDeleteTimestamp);

            // map id -> count
            for (int offset = HEADER_LENGTH; offset < context.length; offset += stepLength) {
                FBUtilities.ByteArrayWrapper id = new FBUtilities.ByteArrayWrapper(
                        ArrayUtils.subarray(context, offset, offset + idLength));
                long count = FBUtilities.byteArrayToLong(context, offset + idLength);

                Long previousCount = contextsMap.put(id, count);
                if (previousCount == null)
                    continue;

                // local id: sum counts
                if (this.idWrapper.equals(id)) {
                    contextsMap.put(id, count + previousCount);
                    continue;
                }

                // remote id: keep highest count
                contextsMap.put(id, Math.max(count, previousCount));
            }
        }

        List<Map.Entry<FBUtilities.ByteArrayWrapper, Long>> contextsList = new ArrayList<Map.Entry<FBUtilities.ByteArrayWrapper, Long>>(
                contextsMap.entrySet());
        Collections.sort(contextsList, new Comparator<Map.Entry<FBUtilities.ByteArrayWrapper, Long>>() {
            public int compare(Map.Entry<FBUtilities.ByteArrayWrapper, Long> e1,
                    Map.Entry<FBUtilities.ByteArrayWrapper, Long> e2) {
                // reversed
                int result = e2.getValue().compareTo(e1.getValue());
                if (result != 0)
                    return result;
                return FBUtilities.compareByteArrays(e2.getKey().data, e1.getKey().data);
            }
        });

        int length = contextsList.size();
        byte[] merged = new byte[HEADER_LENGTH + (length * stepLength)];
        FBUtilities.copyIntoBytes(merged, 0, highestTimestamp);
        FBUtilities.copyIntoBytes(merged, TIMESTAMP_LENGTH, highestDeleteTimestamp);
        for (int i = 0; i < length; i++) {
            Map.Entry<FBUtilities.ByteArrayWrapper, Long> entry = contextsList.get(i);
            writeElementAtStepOffset(merged, i, entry.getKey().data, entry.getValue().longValue());
        }
        return merged;
    }

    /**
     * Human-readable String from context.
     *
     * @param context
     *            version context.
     * @return a human-readable String of the context.
     */
    public String toString(byte[] context) {
        context = sortElementsById(context);

        StringBuilder sb = new StringBuilder();
        sb.append("{");
        sb.append(FBUtilities.byteArrayToLong(context, 0));
        sb.append(", ");
        sb.append(FBUtilities.byteArrayToLong(context, TIMESTAMP_LENGTH));
        sb.append(" + [");
        for (int offset = HEADER_LENGTH; offset < context.length; offset += stepLength) {
            if (offset != HEADER_LENGTH) {
                sb.append(",");
            }
            sb.append("(");
            try {
                InetAddress address = InetAddress
                        .getByAddress(ArrayUtils.subarray(context, offset, offset + idLength));
                sb.append(address.getHostAddress());
            } catch (UnknownHostException uhe) {
                sb.append("?.?.?.?");
            }
            sb.append(", ");
            sb.append(FBUtilities.byteArrayToLong(context, offset + idLength));
            sb.append(")");
        }
        sb.append("]}");
        return sb.toString();
    }

    // return an aggregated count across all node ids
    public byte[] total(byte[] context) {
        long total = 0;

        for (int offset = HEADER_LENGTH; offset < context.length; offset += stepLength) {
            long count = FBUtilities.byteArrayToLong(context, offset + idLength);
            total += count;
        }

        return FBUtilities.toByteArray(total);
    }

    // remove the count for a given node id
    public byte[] cleanNodeCounts(byte[] context, InetAddress node) {
        // calculate node id
        byte[] nodeId = node.getAddress();

        // look for this node id
        for (int offset = HEADER_LENGTH; offset < context.length; offset += stepLength) {
            if (FBUtilities.compareByteSubArrays(context, offset, nodeId, 0, idLength) != 0)
                continue;

            // node id found: remove node count
            byte[] truncatedContext = new byte[context.length - stepLength];
            System.arraycopy(context, 0, truncatedContext, 0, offset);
            System.arraycopy(context, offset + stepLength, truncatedContext, offset,
                    context.length - (offset + stepLength));
            return truncatedContext;
        }

        return context;
    }
}