Source code

Java tutorial


Here is the source code for


// This file is part of OpenTSDB.
// Copyright (C) 2013  The OpenTSDB Authors.
// This program is free software: you can redistribute it and/or modify it
// under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 2.1 of the License, or (at your
// option) any later version.  This program is distributed in the hope that it
// will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty
// General Public License for more details.  You should have received a copy
// of the GNU Lesser General Public License along with this program.  If not,
// see <>.
package net.opentsdb.tree;

import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
import java.util.TreeMap;
import java.util.TreeSet;

import javax.xml.bind.DatatypeConverter;

import org.hbase.async.Bytes;
import org.hbase.async.GetRequest;
import org.hbase.async.HBaseException;
import org.hbase.async.KeyValue;
import org.hbase.async.PutRequest;
import org.hbase.async.Scanner;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.fasterxml.jackson.annotation.JsonAutoDetect;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
import com.fasterxml.jackson.annotation.JsonAutoDetect.Visibility;
import com.fasterxml.jackson.core.JsonGenerator;
import com.stumbleupon.async.Callback;
import com.stumbleupon.async.Deferred;
import com.stumbleupon.async.DeferredGroupException;

import net.opentsdb.core.TSDB;
import net.opentsdb.uid.NoSuchUniqueId;
import net.opentsdb.uid.UniqueId;
import net.opentsdb.utils.JSON;
import net.opentsdb.utils.JSONException;

 * Represents a branch of a meta data tree, used to organize timeseries into 
 * a hierarchy for easy navigation. Each branch is composed of itself and
 * potential child branches and/or child leaves. 
 * <p>
 * Branch IDs are hex encoded byte arrays composed of the tree ID + hash of 
 * the display name for each previous branch. The tree ID is encoded on 
 * {@link Tree#TREE_ID_WIDTH()} bytes, each hash is then {@code INT_WIDTH} 
 * bytes. So the if the tree ID width is 2 bytes and Java Integers are 4 bytes, 
 * the root for tree # 1 is just {@code 0001}. A child of the root could be 
 * {@code 00001A3B190C2} and so on. These IDs are used as the row key in storage.
 * <p>
 * Branch definitions are JSON objects stored in the "branch" column of the 
 * branch ID row. Only the tree ID, path and display name are stored in the
 * definition column to keep space down. Leaves are stored in separate columns
 * and child branch definitions are stored in separate rows. Note that the root
 * branch definition for a tree will be stored in the same row as the tree 
 * definition since they share the same row key.
 * <p>
 * When fetching a branch with children and leaves, a scanner is
 * configured with a row key regex to scan any rows that match the branch ID 
 * plus an additional {@code INT_WIDTH} so that when we scan, we can pick up all
 * of the rows with child branch definitions. Also, when loading a full branch, 
 * any leaves for the request branch can load the associated UID names from 
 * storage, so this can get expensive. Leaves for a child branch will not be 
 * loaded, only leaves that belong directly to the local will. Also, children 
 * branches of children will not be loaded. We only return one branch at a 
 * time since the tree could be HUGE!
 * <p>
 * Storing a branch will only write the definition column for the local branch
 * object. Child branches will not be written to storage. If you've loaded
 * and modified children in this branch, you need to loop through the children
 * and store them individually. Leaves belonging to this branch will be stored
 * and collisions recorded to the given Tree object.
 * @since 2.0
@JsonIgnoreProperties(ignoreUnknown = true)
@JsonAutoDetect(fieldVisibility = Visibility.PUBLIC_ONLY)
public final class Branch implements Comparable<Branch> {
    private static final Logger LOG = LoggerFactory.getLogger(Branch.class);

    /** Charset used to convert Strings to byte arrays and back. */
    private static final Charset CHARSET = Charset.forName("ISO-8859-1");
    /** Integer width in bytes */
    private static final short INT_WIDTH = 4;
    /** Name of the branch qualifier ID */
    private static final byte[] BRANCH_QUALIFIER = "branch".getBytes(CHARSET);

    /** The tree this branch belongs to */
    private int tree_id;

    /** Display name for the branch */
    private String display_name = "";

    /** Hash map of leaves belonging to this branch */
    private HashMap<Integer, Leaf> leaves;

    /** Hash map of child branches */
    private TreeSet<Branch> branches;

    /** The path/name of the branch */
    private TreeMap<Integer, String> path;

     * Default empty constructor necessary for de/serialization
    public Branch() {


     * Constructor that sets the tree ID
     * @param tree_id ID of the tree this branch is associated with
    public Branch(final int tree_id) {
        this.tree_id = tree_id;

     * Copy constructor that creates a completely independent copy of the original
     * @param original The original object to copy from
    public Branch(final Branch original) {
        tree_id = original.tree_id;
        display_name = original.display_name;
        if (original.leaves != null) {
            leaves = new HashMap<Integer, Leaf>(original.leaves);
        if (original.branches != null) {
            branches = new TreeSet<Branch>(original.branches);
        if (original.path != null) {
            path = new TreeMap<Integer, String>(original.path);

    /** @return Returns the {@code display_name}'s hash code or 0 if it's not set */
    public int hashCode() {
        if (display_name == null || display_name.isEmpty()) {
            return 0;
        return display_name.hashCode();

     * Just compares the branch display name
     * @param obj The object to compare this to
     * @return True if the branch IDs are the same or the incoming object is 
     * this one
    public boolean equals(Object obj) {
        if (obj == null) {
            return false;
        if (this.getClass() != obj.getClass()) {
            return false;
        if (obj == this) {
            return true;

        final Branch branch = (Branch) obj;
        return display_name.equals(branch.display_name);

     * Comparator based on the {@code display_name} to sort branches when 
     * returning to an RPC calls
    public int compareTo(Branch branch) {
        return this.display_name.compareToIgnoreCase(branch.display_name);

    /** @return Information about this branch including ID and display name */
    public String toString() {
        if (path == null) {
            return "Name: [" + display_name + "]";
        } else {
            return "ID: [" + getBranchId() + "] Name: [" + display_name + "]";

     * Adds a child branch to the local branch set if it doesn't exist. Also
     * initializes the set if it hasn't been initialized yet
     * @param branch The branch to add
     * @return True if the branch did not exist in the set previously
     * @throws IllegalArgumentException if the incoming branch is null
    public boolean addChild(final Branch branch) {
        if (branch == null) {
            throw new IllegalArgumentException("Null branches are not allowed");
        if (branches == null) {
            branches = new TreeSet<Branch>();
            return true;

        if (branches.contains(branch)) {
            return false;
        return true;

     * Adds a leaf to the local branch, looking for collisions
     * @param leaf The leaf to add
     * @param tree The tree to report to with collisions
     * @return True if the leaf was new, false if the leaf already exists or 
     * would cause a collision
     * @throws IllegalArgumentException if the incoming leaf is null
    public boolean addLeaf(final Leaf leaf, final Tree tree) {
        if (leaf == null) {
            throw new IllegalArgumentException("Null leaves are not allowed");
        if (leaves == null) {
            leaves = new HashMap<Integer, Leaf>();
            leaves.put(leaf.hashCode(), leaf);
            return true;

        if (leaves.containsKey(leaf.hashCode())) {
            // if we try to sync a leaf with the same hash of an existing key
            // but a different TSUID, it's a collision, so mark it
            if (!leaves.get(leaf.hashCode()).getTsuid().equals(leaf.getTsuid())) {
                final Leaf collision = leaves.get(leaf.hashCode());
                if (tree != null) {
                    tree.addCollision(leaf.getTsuid(), collision.getTsuid());

                // log at info or lower since it's not a system error, rather it's
                // a user issue with the rules or naming schema
                LOG.warn("Incoming TSUID [" + leaf.getTsuid() + "] collided with existing TSUID ["
                        + collision.getTsuid() + "] on display name [" + collision.getDisplayName() + "]");
            return false;
        } else {
            leaves.put(leaf.hashCode(), leaf);
            return true;

     * Attempts to compile the branch ID for this branch. In order to successfully
     * compile, the {@code tree_id}, {@code path} and {@code display_name} must
     * be set. The path may be empty, which indicates this is a root branch, but
     * it must be a valid Map object.
     * @return The branch ID as a byte array
     * @throws IllegalArgumentException if any required parameters are missing
    public byte[] compileBranchId() {
        if (tree_id < 1 || tree_id > 65535) {
            throw new IllegalArgumentException("Missing or invalid tree ID");
        // root branch path may be empty
        if (path == null) {
            throw new IllegalArgumentException("Missing branch path");
        if (display_name == null || display_name.isEmpty()) {
            throw new IllegalArgumentException("Missing display name");

        // first, make sure the display name is at the tip of the tree set
        if (path.isEmpty()) {
            path.put(0, display_name);
        } else if (!path.lastEntry().getValue().equals(display_name)) {
            final int depth = path.lastEntry().getKey() + 1;
            path.put(depth, display_name);

        final byte[] branch_id = new byte[Tree.TREE_ID_WIDTH() + ((path.size() - 1) * INT_WIDTH)];
        int index = 0;
        final byte[] tree_bytes = Tree.idToBytes(tree_id);
        System.arraycopy(tree_bytes, 0, branch_id, index, tree_bytes.length);
        index += tree_bytes.length;

        for (Map.Entry<Integer, String> entry : path.entrySet()) {
            // skip the root, keeps the row keys 4 bytes shorter
            if (entry.getKey() == 0) {

            final byte[] hash = Bytes.fromInt(entry.getValue().hashCode());
            System.arraycopy(hash, 0, branch_id, index, hash.length);
            index += hash.length;

        return branch_id;

     * Sets the path for this branch based off the path of the parent. This map
     * may be empty, in which case the branch is considered a root.
     * <b>Warning:</b> If the path has already been set, this will create a new
     * path, clearing out any existing entries
     * @param parent_path The map to store as the path
     * @throws IllegalArgumentException if the parent path is null
    public void prependParentPath(final Map<Integer, String> parent_path) {
        if (parent_path == null) {
            throw new IllegalArgumentException("Parent path was null");
        path = new TreeMap<Integer, String>();

     * Attempts to write the branch definition and optionally child leaves to
     * storage via CompareAndSets.
     * Each returned deferred will be a boolean regarding whether the CAS call 
     * was successful or not. This will be a mix of the branch call and leaves.
     * Some of these may be false, which is OK, because if the branch
     * definition already exists, we don't need to re-write it. Leaves will
     * return false if there was a collision.
     * @param tsdb The TSDB to use for access
     * @param tree The tree to record collisions to
     * @param store_leaves Whether or not child leaves should be written to
     * storage
     * @return A list of deferreds to wait on for completion.
     * @throws HBaseException if there was an issue
     * @throws IllegalArgumentException if the tree ID was missing or data was 
     * missing
    public Deferred<ArrayList<Boolean>> storeBranch(final TSDB tsdb, final Tree tree, final boolean store_leaves) {
        if (tree_id < 1 || tree_id > 65535) {
            throw new IllegalArgumentException("Missing or invalid tree ID");

        final ArrayList<Deferred<Boolean>> storage_results = new ArrayList<Deferred<Boolean>>(
                leaves != null ? leaves.size() + 1 : 1);

        // compile the row key by making sure the display_name is in the path set
        // row ID = <treeID>[<parent.display_name.hashCode()>...]
        final byte[] row = this.compileBranchId();

        // compile the object for storage, this will toss exceptions if we are
        // missing anything important
        final byte[] storage_data = toStorageJson();

        final PutRequest put = new PutRequest(tsdb.treeTable(), row, Tree.TREE_FAMILY(), BRANCH_QUALIFIER,
        storage_results.add(tsdb.getClient().compareAndSet(put, new byte[0]));

        // store leaves if told to and put the storage calls in our deferred group
        if (store_leaves && leaves != null && !leaves.isEmpty()) {
            for (final Leaf leaf : leaves.values()) {
                storage_results.add(leaf.storeLeaf(tsdb, row, tree));


     * Attempts to fetch only the branch definition object from storage. This is
     * much faster than scanning many rows for child branches as per the 
     * {@link #fetchBranch} call. Useful when building trees, particularly to
     * fetch the root branch.
     * @param tsdb The TSDB to use for access
     * @param branch_id ID of the branch to retrieve
     * @return A branch if found, null if it did not exist
     * @throws JSONException if the object could not be deserialized
    public static Deferred<Branch> fetchBranchOnly(final TSDB tsdb, final byte[] branch_id) {

        final GetRequest get = new GetRequest(tsdb.treeTable(), branch_id);;

         * Called after the get returns with or without data. If we have data, we'll
         * parse the branch and return it.
        final class GetCB implements Callback<Deferred<Branch>, ArrayList<KeyValue>> {

            public Deferred<Branch> call(ArrayList<KeyValue> row) throws Exception {
                if (row == null || row.isEmpty()) {
                    return Deferred.fromResult(null);

                final Branch branch = JSON.parseToObject(row.get(0).value(), Branch.class);

                // WARNING: Since the json doesn't store the tree ID, to cut down on
                // space, we have to load it from the row key.
                branch.tree_id = Tree.bytesToId(row.get(0).key());
                return Deferred.fromResult(branch);


        return tsdb.getClient().get(get).addCallbackDeferring(new GetCB());

     * Attempts to fetch the branch, it's leaves and all child branches.
     * The UID names for each leaf may also be loaded if configured.
     * @param tsdb The TSDB to use for storage access
     * @param branch_id ID of the branch to retrieve
     * @param load_leaf_uids Whether or not to load UID names for each leaf
     * @return A branch if found, null if it did not exist
     * @throws JSONException if the object could not be deserialized
    public static Deferred<Branch> fetchBranch(final TSDB tsdb, final byte[] branch_id,
            final boolean load_leaf_uids) {

        final Deferred<Branch> result = new Deferred<Branch>();
        final Scanner scanner = setupBranchScanner(tsdb, branch_id);

        // This is the branch that will be loaded with data from the scanner and
        // returned at the end of the process.
        final Branch branch = new Branch();

        // A list of deferreds to wait on for child leaf processing
        final ArrayList<Deferred<Object>> leaf_group = new ArrayList<Deferred<Object>>();

         * Exception handler to catch leaves with an invalid UID name due to a 
         * possible deletion. This will allow the scanner to keep loading valid
         * leaves and ignore problems. The fsck tool can be used to clean up
         * orphaned leaves. If we catch something other than an NSU, it will
         * re-throw the exception
        final class LeafErrBack implements Callback<Object, Exception> {

            final byte[] qualifier;

            public LeafErrBack(final byte[] qualifier) {
                this.qualifier = qualifier;

            public Object call(final Exception e) throws Exception {
                Throwable ex = e;
                while (ex.getClass().equals(DeferredGroupException.class)) {
                    ex = ex.getCause();
                if (ex.getClass().equals(NoSuchUniqueId.class)) {
                    LOG.debug("Invalid UID for leaf: " + idToString(qualifier) + " in branch: "
                            + idToString(branch_id), ex);
                } else {
                    throw (Exception) ex;
                return null;


         * Called after a leaf has been loaded successfully and adds the leaf
         * to the branch's leaf set. Also lazily initializes the leaf set if it 
         * hasn't been.
        final class LeafCB implements Callback<Object, Leaf> {

            public Object call(final Leaf leaf) throws Exception {
                if (leaf != null) {
                    if (branch.leaves == null) {
                        branch.leaves = new HashMap<Integer, Leaf>();
                    branch.leaves.put(leaf.hashCode(), leaf);
                return null;


         * Scanner callback executed recursively each time we get a set of data
         * from storage. This is responsible for determining what columns are 
         * returned and issuing requests to load leaf objects.
         * When the scanner returns a null set of rows, the method initiates the
         * final callback.
        final class FetchBranchCB implements Callback<Object, ArrayList<ArrayList<KeyValue>>> {

             * Starts the scanner and is called recursively to fetch the next set of
             * rows from the scanner.
             * @return The branch if loaded successfully, null if the branch was not
             * found.
            public Object fetchBranch() {
                return scanner.nextRows().addCallback(this);

             * Loops through each row of the scanner results and parses out branch
             * definitions and child leaves.
             * @return The final branch callback if the scanner returns a null set
            public Object call(final ArrayList<ArrayList<KeyValue>> rows) throws Exception {
                if (rows == null) {
                    if (branch.tree_id < 1 || branch.path == null) {
                    } else {
                    return null;

                for (final ArrayList<KeyValue> row : rows) {
                    for (KeyValue column : row) {

                        // matched a branch column
                        if (Bytes.equals(BRANCH_QUALIFIER, column.qualifier())) {
                            if (Bytes.equals(branch_id, column.key())) {

                                // it's *this* branch. We deserialize to a new object and copy
                                // since the columns could be in any order and we may get a 
                                // leaf before the branch
                                final Branch local_branch = JSON.parseToObject(column.value(), Branch.class);
                                branch.path = local_branch.path;
                                branch.display_name = local_branch.display_name;
                                branch.tree_id = Tree.bytesToId(column.key());

                            } else {
                                // it's a child branch
                                final Branch child = JSON.parseToObject(column.value(), Branch.class);
                                child.tree_id = Tree.bytesToId(column.key());
                            // parse out a leaf
                        } else if (Bytes.memcmp(Leaf.LEAF_PREFIX(), column.qualifier(), 0,
                                Leaf.LEAF_PREFIX().length) == 0) {
                            if (Bytes.equals(branch_id, column.key())) {
                                // process a leaf and skip if the UIDs for the TSUID can't be 
                                // found. Add an errback to catch NoSuchUniqueId exceptions
                                leaf_group.add(Leaf.parseFromStorage(tsdb, column, load_leaf_uids)
                                        .addCallbacks(new LeafCB(), new LeafErrBack(column.qualifier())));
                            } else {
                                // TODO - figure out an efficient way to increment a counter in 
                                // the child branch with the # of leaves it has

                // recursively call ourself to fetch more results from the scanner
                return fetchBranch();

        // start scanning
        new FetchBranchCB().fetchBranch();
        return result;

     * Converts a branch ID hash to a hex encoded, upper case string with padding
     * @param branch_id The ID to convert
     * @return the branch ID as a character hex string
    public static String idToString(final byte[] branch_id) {
        return DatatypeConverter.printHexBinary(branch_id);

     * Converts a hex string to a branch ID byte array (row key)
     * @param branch_id The branch ID to convert
     * @return The branch ID as a byte array
     * @throws IllegalArgumentException if the string is not valid hex
    public static byte[] stringToId(final String branch_id) {
        if (branch_id == null || branch_id.isEmpty()) {
            throw new IllegalArgumentException("Branch ID was empty");
        if (branch_id.length() < 4) {
            throw new IllegalArgumentException("Branch ID was too short");
        String id = branch_id;
        if (id.length() % 2 != 0) {
            id = "0" + id;
        return DatatypeConverter.parseHexBinary(id);

    /** @return The branch column qualifier name */
    public static byte[] BRANCH_QUALIFIER() {
        return BRANCH_QUALIFIER;

     * Returns serialized data for the branch to put in storage. This is necessary
     * to reduce storage space and for proper CAS calls
     * @return A byte array for storage
    private byte[] toStorageJson() {
        // grab some memory to avoid reallocs
        final ByteArrayOutputStream output = new ByteArrayOutputStream(
                (display_name.length() * 2) + (path.size() * 128));
        try {
            final JsonGenerator json = JSON.getFactory().createGenerator(output);


            // we only need to write a small amount of information
            json.writeObjectField("path", path);
            json.writeStringField("displayName", display_name);


            // TODO zero copy?
            return output.toByteArray();
        } catch (IOException e) {
            throw new RuntimeException(e);

     * Configures an HBase scanner to fetch the requested branch and all child
     * branches. It uses a row key regex filter to match any rows starting with
     * the given branch and another INT_WIDTH bytes deep. Deeper branches are
     * ignored.
     * @param tsdb The TSDB to use for storage access
     * @param branch_id ID of the branch to fetch
     * @return An HBase scanner ready for scanning
    private static Scanner setupBranchScanner(final TSDB tsdb, final byte[] branch_id) {
        final byte[] start = branch_id;
        final byte[] end = Arrays.copyOf(branch_id, branch_id.length);
        final Scanner scanner = tsdb.getClient().newScanner(tsdb.treeTable());

        // increment the tree ID so we scan the whole tree
        byte[] tree_id = new byte[INT_WIDTH];
        for (int i = 0; i < Tree.TREE_ID_WIDTH(); i++) {
            tree_id[i + (INT_WIDTH - Tree.TREE_ID_WIDTH())] = end[i];
        int id = Bytes.getInt(tree_id) + 1;
        tree_id = Bytes.fromInt(id);
        for (int i = 0; i < Tree.TREE_ID_WIDTH(); i++) {
            end[i] = tree_id[i + (INT_WIDTH - Tree.TREE_ID_WIDTH())];

        // TODO - use the column filter to fetch only branches and leaves, ignore
        // collisions, no matches and other meta

        // set the regex filter
        // we want one branch below the current ID so we want something like:
        // {0, 1, 1, 2, 3, 4 }  where { 0, 1 } is the tree ID, { 1, 2, 3, 4 } is the 
        // branch
        // "^\\Q\000\001\001\002\003\004\\E(?:.{4})$"

        final StringBuilder buf = new StringBuilder((start.length * 6) + 20);
        buf.append("(?s)" // Ensure we use the DOTALL flag.
                + "^\\Q");
        for (final byte b : start) {
            buf.append((char) (b & 0xFF));

        scanner.setKeyRegexp(buf.toString(), CHARSET);
        return scanner;

    // GETTERS AND SETTERS ----------------------------

    /** @return The ID of the tree this branch belongs to */
    public int getTreeId() {
        return tree_id;

    /** @return The ID of this branch */
    public String getBranchId() {
        final byte[] id = compileBranchId();
        if (id == null) {
            return null;
        return UniqueId.uidToString(id);

    /** @return The path of the tree */
    public Map<Integer, String> getPath() {
        return path;

    /** @return Depth of this branch */
    public int getDepth() {
        return path.lastKey();

    /** @return Name to display to the public */
    public String getDisplayName() {
        return display_name;

    /** @return Ordered set of leaves belonging to this branch */
    public TreeSet<Leaf> getLeaves() {
        if (leaves == null) {
            return null;
        return new TreeSet<Leaf>(leaves.values());

    /** @return Ordered set of child branches */
    public TreeSet<Branch> getBranches() {
        return branches;

    /** @param tree_id ID of the tree this branch belongs to */
    public void setTreeId(int tree_id) {
        this.tree_id = tree_id;

    /** @param display_name Public name to display */
    public void setDisplayName(String display_name) {
        this.display_name = display_name;
