org.apache.kylin.cube.cuboid.algorithm.CuboidStats.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.kylin.cube.cuboid.algorithm.CuboidStats.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
*/

package org.apache.kylin.cube.cuboid.algorithm;

import java.util.List;
import java.util.Map;
import java.util.Set;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;

public class CuboidStats {
    private static final Logger logger = LoggerFactory.getLogger(CuboidStats.class);

    public static class Builder {

        private static final long THRESHOLD_ROLL_UP_FOR_MANDATORY = 1000L;

        // Required parameters
        private String key;
        private Long baseCuboid;
        private Map<Long, Long> statistics;
        private Map<Long, Double> size;

        // Optional parameters - initialized to default values
        private Set<Long> mandatoryCuboids = null;
        //// These two properties are for generating mandatory cuboids
        private Map<Long, Map<Long, Long>> rollingUpCountSourceMap = null;
        private Long rollUpThresholdForMandatory = null;

        private Map<Long, Long> hitFrequencyMap = null;
        private Map<Long, Map<Long, Long>> scanCountSourceMap = null;

        public Builder(String key, Long baseCuboid, Map<Long, Long> statistics, Map<Long, Double> size) {
            this.key = key;
            this.baseCuboid = baseCuboid;
            this.statistics = statistics;
            this.size = size;
        }

        public Builder setRollingUpCountSourceMap(Map<Long, Map<Long, Long>> rollingUpCountSourceMap) {
            this.rollingUpCountSourceMap = rollingUpCountSourceMap;
            this.rollUpThresholdForMandatory = THRESHOLD_ROLL_UP_FOR_MANDATORY;
            return this;
        }

        public Builder setRollingUpCountSourceMap(Map<Long, Map<Long, Long>> rollingUpCountSourceMap,
                long rollUpThresholdForMandatory) {
            this.rollingUpCountSourceMap = rollingUpCountSourceMap;
            this.rollUpThresholdForMandatory = rollUpThresholdForMandatory;
            return this;
        }

        public Builder setMandatoryCuboids(Set<Long> mandatoryCuboids) {
            this.mandatoryCuboids = mandatoryCuboids;
            return this;
        }

        public Builder setHitFrequencyMap(Map<Long, Long> hitFrequencyMap) {
            this.hitFrequencyMap = hitFrequencyMap;
            return this;
        }

        public Builder setScanCountSourceMap(Map<Long, Map<Long, Long>> scanCountSourceMap) {
            this.scanCountSourceMap = scanCountSourceMap;
            return this;
        }

        public CuboidStats build() {
            Preconditions.checkNotNull(key, "key should not be null");
            Preconditions.checkNotNull(baseCuboid, "baseCuboid should not be null");
            Preconditions.checkNotNull(statistics, "statistics should not be null");
            Preconditions.checkNotNull(size, "size should not be null");
            Preconditions.checkNotNull(statistics.get(baseCuboid),
                    "row count should exist for base cuboid " + baseCuboid);
            Preconditions.checkState(statistics.keySet().equals(size.keySet()),
                    "statistics & size should own the same key set");
            if (mandatoryCuboids == null) {
                mandatoryCuboids = Sets.newHashSet();
            }
            if (rollingUpCountSourceMap != null) {
                mandatoryCuboids.addAll(CuboidStatsUtil.generateMandatoryCuboidSet(statistics, hitFrequencyMap,
                        rollingUpCountSourceMap, rollUpThresholdForMandatory));
            }

            return new CuboidStats(key, baseCuboid, mandatoryCuboids, statistics, size, hitFrequencyMap,
                    scanCountSourceMap);
        }
    }

    private static final double WEIGHT_FOR_UN_QUERY = 0.2;

    private String key;
    private long baseCuboid;
    private ImmutableSet<Long> mandatoryCuboidSet;
    private ImmutableSet<Long> selectionCuboidSet;
    private ImmutableMap<Long, Long> cuboidCountMap;
    private ImmutableMap<Long, Double> cuboidSizeMap;
    private ImmutableMap<Long, Double> cuboidHitProbabilityMap;
    private ImmutableMap<Long, Long> cuboidScanCountMap;

    private ImmutableMap<Long, List<Long>> directChildrenCache;
    private Map<Long, Set<Long>> allDescendantsCache;

    private CuboidStats(String key, long baseCuboidId, Set<Long> mandatoryCuboids, Map<Long, Long> statistics,
            Map<Long, Double> size, Map<Long, Long> hitFrequencyMap,
            Map<Long, Map<Long, Long>> scanCountSourceMap) {

        this.key = key;
        this.baseCuboid = baseCuboidId;
        /** Initial mandatory cuboids */
        Set<Long> cuboidsForMandatory = Sets.newHashSet(mandatoryCuboids);
        //Always add base cuboid.
        if (!cuboidsForMandatory.contains(baseCuboid)) {
            cuboidsForMandatory.add(baseCuboid);
        }
        logger.info("Mandatory cuboids: " + cuboidsForMandatory);

        /** Initial selection cuboids */
        Set<Long> cuboidsForSelection = Sets.newHashSet(statistics.keySet());
        cuboidsForSelection.removeAll(cuboidsForMandatory);

        //There's no overlap between mandatoryCuboidSet and selectionCuboidSet
        this.mandatoryCuboidSet = ImmutableSet.<Long>builder().addAll(cuboidsForMandatory).build();
        this.selectionCuboidSet = ImmutableSet.<Long>builder().addAll(cuboidsForSelection).build();
        if (selectionCuboidSet.isEmpty()) {
            logger.warn("The selection set should not be empty!!!");
        }

        /** Initialize row count for mandatory cuboids */
        CuboidStatsUtil.complementRowCountForMandatoryCuboids(statistics, baseCuboid, mandatoryCuboidSet);

        this.cuboidCountMap = ImmutableMap.<Long, Long>builder().putAll(statistics).build();
        this.cuboidSizeMap = ImmutableMap.<Long, Double>builder().putAll(size).build();

        /** Initialize the hit probability for each selection cuboid */
        Map<Long, Double> tmpCuboidHitProbabilityMap = Maps.newHashMapWithExpectedSize(selectionCuboidSet.size());
        if (hitFrequencyMap != null) {
            long totalHitFrequency = 0L;
            for (Map.Entry<Long, Long> hitFrequency : hitFrequencyMap.entrySet()) {
                if (selectionCuboidSet.contains(hitFrequency.getKey())) {
                    totalHitFrequency += hitFrequency.getValue();
                }
            }

            final double unitUncertainProb = WEIGHT_FOR_UN_QUERY / selectionCuboidSet.size();
            for (Long cuboid : selectionCuboidSet) {
                //Calculate hit probability for each cuboid
                if (hitFrequencyMap.get(cuboid) != null) {
                    tmpCuboidHitProbabilityMap.put(cuboid, unitUncertainProb
                            + (1 - WEIGHT_FOR_UN_QUERY) * hitFrequencyMap.get(cuboid) / totalHitFrequency);
                } else {
                    tmpCuboidHitProbabilityMap.put(cuboid, unitUncertainProb);
                }
            }
        } else {
            for (Long cuboid : selectionCuboidSet) {
                tmpCuboidHitProbabilityMap.put(cuboid, 1.0 / selectionCuboidSet.size());
            }
        }
        this.cuboidHitProbabilityMap = ImmutableMap.<Long, Double>builder().putAll(tmpCuboidHitProbabilityMap)
                .build();

        /** Initialize the scan count when query for each selection cuboid + one base cuboid */
        Map<Long, Long> tmpCuboidScanCountMap = Maps.newHashMapWithExpectedSize(1 + selectionCuboidSet.size());
        tmpCuboidScanCountMap.put(baseCuboid, getExpScanCount(baseCuboid, statistics, scanCountSourceMap));
        for (Long cuboid : selectionCuboidSet) {
            tmpCuboidScanCountMap.put(cuboid, getExpScanCount(cuboid, statistics, scanCountSourceMap));
        }
        this.cuboidScanCountMap = ImmutableMap.<Long, Long>builder().putAll(tmpCuboidScanCountMap).build();

        this.directChildrenCache = ImmutableMap.<Long, List<Long>>builder()
                .putAll(CuboidStatsUtil.createDirectChildrenCache(statistics.keySet())).build();

        this.allDescendantsCache = Maps.newConcurrentMap();
    }

    private long getExpScanCount(long sourceCuboid, Map<Long, Long> statistics,
            Map<Long, Map<Long, Long>> scanCountSourceMap) {
        Preconditions.checkNotNull(statistics.get(sourceCuboid),
                "The statistics for source cuboid " + sourceCuboid + " does not exist!!!");
        if (scanCountSourceMap == null || scanCountSourceMap.get(sourceCuboid) == null
                || scanCountSourceMap.get(sourceCuboid).size() <= 0) {
            return statistics.get(sourceCuboid);
        } else {
            //TODO some improvement can be done by assigning weights based on distance between source cuboid and target cuboid
            Map<Long, Long> scanCountTargetMap = scanCountSourceMap.get(sourceCuboid);
            long totalEstScanCount = 0L;
            for (Map.Entry<Long, Long> subEntry : scanCountTargetMap.entrySet()) {
                long targetCuboid = subEntry.getKey();
                Preconditions.checkNotNull(statistics.get(targetCuboid),
                        "The statistics for target cuboid " + targetCuboid + " does not exist!!!");
                // Consider the ratio of row count between source cuboid and target cuboid
                totalEstScanCount += subEntry.getValue() * statistics.get(sourceCuboid)
                        / statistics.get(targetCuboid);
            }
            return totalEstScanCount / scanCountTargetMap.size();
        }
    }

    public Set<Long> getAllDescendants(long cuboid) {
        Set<Long> allDescendants = Sets.newLinkedHashSet();
        if (selectionCuboidSet.contains(cuboid)) {
            if (allDescendantsCache.get(cuboid) != null) {
                return allDescendantsCache.get(cuboid);
            } else {
                getAllDescendants(cuboid, allDescendants);
                allDescendantsCache.put(cuboid, allDescendants);
            }
        }
        return allDescendants;
    }

    private void getAllDescendants(long cuboid, Set<Long> allDescendants) {
        if (allDescendants.contains(cuboid)) {
            return;
        }
        allDescendants.add(cuboid);
        for (Long directChild : directChildrenCache.get(cuboid)) {
            getAllDescendants(directChild, allDescendants);
        }
    }

    public Set<Long> getAllCuboidsForSelection() {
        return selectionCuboidSet;
    }

    public Set<Long> getAllCuboidsForMandatory() {
        return mandatoryCuboidSet;
    }

    public Long getCuboidQueryCost(long cuboid) {
        return cuboidScanCountMap.get(cuboid);
    }

    public Long getCuboidCount(long cuboid) {
        return cuboidCountMap.get(cuboid);
    }

    public Double getCuboidSize(long cuboid) {
        return cuboidSizeMap.get(cuboid);
    }

    public double getCuboidHitProbability(long cuboid) {
        if (mandatoryCuboidSet.contains(cuboid)) {
            return 1;
        } else {
            return cuboidHitProbabilityMap.get(cuboid) == null ? 0 : cuboidHitProbabilityMap.get(cuboid);
        }
    }

    public Map<Long, Long> getStatistics() {
        return cuboidCountMap;
    }

    public double getBaseCuboidSize() {
        return getCuboidSize(baseCuboid);
    }

    public long getBaseCuboid() {
        return baseCuboid;
    }

    public String getKey() {
        return key;
    }

    public CuboidBenefitModel.CuboidModel getCuboidModel(long cuboid) {
        return new CuboidBenefitModel.CuboidModel(cuboid, getCuboidCount(cuboid), getCuboidSize(cuboid),
                getCuboidHitProbability(cuboid), getCuboidQueryCost(cuboid));
    }
}