org.apache.druid.query.aggregation.datasketches.theta.SketchHolder.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.druid.query.aggregation.datasketches.theta.SketchHolder.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.druid.query.aggregation.datasketches.theta;

import com.google.common.base.Preconditions;
import com.google.common.collect.Ordering;
import com.google.common.primitives.Doubles;
import com.google.common.primitives.Longs;
import com.yahoo.memory.Memory;
import com.yahoo.sketches.Family;
import com.yahoo.sketches.theta.AnotB;
import com.yahoo.sketches.theta.Intersection;
import com.yahoo.sketches.theta.SetOperation;
import com.yahoo.sketches.theta.Sketch;
import com.yahoo.sketches.theta.Sketches;
import com.yahoo.sketches.theta.Union;
import org.apache.commons.codec.binary.Base64;
import org.apache.druid.java.util.common.IAE;
import org.apache.druid.java.util.common.ISE;
import org.apache.druid.java.util.common.StringUtils;

import java.util.Arrays;
import java.util.Comparator;

/**
 */
public class SketchHolder {
    public static final SketchHolder EMPTY = SketchHolder
            .of(Sketches.updateSketchBuilder().build().compact(true, null));

    public static final Comparator<Object> COMPARATOR = Ordering.from(new Comparator() {
        @Override
        public int compare(Object o1, Object o2) {
            SketchHolder h1 = (SketchHolder) o1;
            SketchHolder h2 = (SketchHolder) o2;

            if (h1.obj instanceof Sketch || h1.obj instanceof Union) {
                if (h2.obj instanceof Sketch || h2.obj instanceof Union) {
                    return SKETCH_COMPARATOR.compare(h1.getSketch(), h2.getSketch());
                } else {
                    return -1;
                }
            }

            if (h1.obj instanceof Memory) {
                if (h2.obj instanceof Memory) {
                    return MEMORY_COMPARATOR.compare((Memory) h1.obj, (Memory) h2.obj);
                } else {
                    return 1;
                }
            }

            throw new IAE("Unknwon types [%s] and [%s]", h1.obj.getClass().getName(), h2.obj.getClass().getName());
        }
    }).nullsFirst();

    private static final Comparator<Sketch> SKETCH_COMPARATOR = new Comparator<Sketch>() {
        @Override
        public int compare(Sketch o1, Sketch o2) {
            return Doubles.compare(o1.getEstimate(), o2.getEstimate());
        }
    };

    private static final Comparator<Memory> MEMORY_COMPARATOR = new Comparator<Memory>() {
        @Override
        public int compare(Memory o1, Memory o2) {
            // We have two Ordered Compact sketches, so just compare their last entry if they have the size.
            // This is to produce a deterministic ordering, though it might not match the actual estimate
            // ordering, but that's ok because this comparator is only used by GenericIndexed
            int retVal = Longs.compare(o1.getCapacity(), o2.getCapacity());
            if (retVal == 0) {
                retVal = Longs.compare(o1.getLong(o2.getCapacity() - 8), o2.getLong(o2.getCapacity() - 8));
            }

            return retVal;
        }
    };

    private final Object obj;

    private volatile Double cachedEstimate = null;
    private volatile Sketch cachedSketch = null;

    private SketchHolder(Object obj) {
        Preconditions.checkArgument(obj instanceof Sketch || obj instanceof Union || obj instanceof Memory,
                "unknown sketch representation type [%s]", obj.getClass().getName());
        this.obj = obj;
    }

    public static SketchHolder of(Object obj) {
        return new SketchHolder(obj);
    }

    public void updateUnion(Union union) {
        if (obj instanceof Memory) {
            union.update((Memory) obj);
        } else {
            union.update(getSketch());
        }
    }

    public Sketch getSketch() {
        if (cachedSketch != null) {
            return cachedSketch;
        }

        if (obj instanceof Sketch) {
            cachedSketch = (Sketch) obj;
        } else if (obj instanceof Union) {
            cachedSketch = ((Union) obj).getResult();
        } else if (obj instanceof Memory) {
            cachedSketch = deserializeFromMemory((Memory) obj);
        } else {
            throw new ISE("Can't get sketch from object of type [%s]", obj.getClass().getName());
        }
        return cachedSketch;
    }

    public double getEstimate() {
        if (cachedEstimate == null) {
            cachedEstimate = getSketch().getEstimate();
        }
        return cachedEstimate.doubleValue();
    }

    public SketchEstimateWithErrorBounds getEstimateWithErrorBounds(int errorBoundsStdDev) {
        Sketch sketch = getSketch();
        SketchEstimateWithErrorBounds result = new SketchEstimateWithErrorBounds(getEstimate(),
                sketch.getUpperBound(errorBoundsStdDev), sketch.getLowerBound(errorBoundsStdDev),
                errorBoundsStdDev);
        return result;
    }

    public static SketchHolder combine(Object o1, Object o2, int nomEntries) {
        SketchHolder holder1 = (SketchHolder) o1;
        SketchHolder holder2 = (SketchHolder) o2;

        if (holder1.obj instanceof Union) {
            Union union = (Union) holder1.obj;
            holder2.updateUnion(union);
            holder1.invalidateCache();
            return holder1;
        } else if (holder2.obj instanceof Union) {
            Union union = (Union) holder2.obj;
            holder1.updateUnion(union);
            holder2.invalidateCache();
            return holder2;
        } else {
            Union union = (Union) SetOperation.builder().setNominalEntries(nomEntries).build(Family.UNION);
            holder1.updateUnion(union);
            holder2.updateUnion(union);
            return SketchHolder.of(union);
        }
    }

    void invalidateCache() {
        cachedEstimate = null;
        cachedSketch = null;
    }

    public static SketchHolder deserialize(Object serializedSketch) {
        if (serializedSketch instanceof String) {
            return SketchHolder.of(deserializeFromBase64EncodedString((String) serializedSketch));
        } else if (serializedSketch instanceof byte[]) {
            return SketchHolder.of(deserializeFromByteArray((byte[]) serializedSketch));
        } else if (serializedSketch instanceof SketchHolder) {
            return (SketchHolder) serializedSketch;
        } else if (serializedSketch instanceof Sketch || serializedSketch instanceof Union
                || serializedSketch instanceof Memory) {
            return SketchHolder.of(serializedSketch);
        }

        throw new ISE("Object is not of a type[%s] that can be deserialized to sketch.",
                serializedSketch.getClass());
    }

    private static Sketch deserializeFromBase64EncodedString(String str) {
        return deserializeFromByteArray(Base64.decodeBase64(StringUtils.toUtf8(str)));
    }

    private static Sketch deserializeFromByteArray(byte[] data) {
        return deserializeFromMemory(Memory.wrap(data));
    }

    private static Sketch deserializeFromMemory(Memory mem) {
        if (Sketch.getSerializationVersion(mem) < 3) {
            return Sketches.heapifySketch(mem);
        } else {
            return Sketches.wrapSketch(mem);
        }
    }

    public enum Func {
        UNION, INTERSECT, NOT
    }

    public static SketchHolder sketchSetOperation(Func func, int sketchSize, Object... holders) {
        //in the code below, I am returning SetOp.getResult(false, null)
        //"false" gets us an unordered sketch which is faster to build
        //"true" returns an ordered sketch but slower to compute. advantage of ordered sketch
        //is that they are faster to "union" later but given that this method is used in
        //the final stages of query processing, ordered sketch would be of no use.
        switch (func) {
        case UNION:
            Union union = (Union) SetOperation.builder().setNominalEntries(sketchSize).build(Family.UNION);
            for (Object o : holders) {
                ((SketchHolder) o).updateUnion(union);
            }
            return SketchHolder.of(union);
        case INTERSECT:
            Intersection intersection = (Intersection) SetOperation.builder().setNominalEntries(sketchSize)
                    .build(Family.INTERSECTION);
            for (Object o : holders) {
                intersection.update(((SketchHolder) o).getSketch());
            }
            return SketchHolder.of(intersection.getResult(false, null));
        case NOT:
            if (holders.length < 1) {
                throw new IllegalArgumentException("A-Not-B requires atleast 1 sketch");
            }

            if (holders.length == 1) {
                return (SketchHolder) holders[0];
            }

            Sketch result = ((SketchHolder) holders[0]).getSketch();
            for (int i = 1; i < holders.length; i++) {
                AnotB anotb = (AnotB) SetOperation.builder().setNominalEntries(sketchSize).build(Family.A_NOT_B);
                anotb.update(result, ((SketchHolder) holders[i]).getSketch());
                result = anotb.getResult(false, null);
            }
            return SketchHolder.of(result);
        default:
            throw new IllegalArgumentException("Unknown sketch operation " + func);
        }
    }

    /**
     *  Ideally make use of Sketch's equals and hashCode methods but which are not value based implementations.
     *  And yet need value based equals and hashCode implementations for SketchHolder. 
     *  Hence using Arrays.equals() and Arrays.hashCode().
     */
    @Override
    public boolean equals(Object o) {
        if (this == o) {
            return true;
        }
        if (o == null || getClass() != o.getClass()) {
            return false;
        }
        return Arrays.equals(this.getSketch().toByteArray(), ((SketchHolder) o).getSketch().toByteArray());
    }

    @Override
    public int hashCode() {
        return 31 * Arrays.hashCode(this.getSketch().toByteArray());
    }
}