org.apache.lens.cube.parse.AbridgedTimeRangeWriter.java Source code

Introduction

Here is the source code for org.apache.lens.cube.parse.AbridgedTimeRangeWriter.java
Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.lens.cube.parse;

import static java.util.stream.Collectors.toMap;

import java.util.*;

import org.apache.lens.cube.metadata.FactPartition;
import org.apache.lens.server.api.error.LensException;

import org.apache.commons.lang.StringUtils;

import com.google.common.collect.Maps;
import com.google.common.collect.Sets;

/**
 * Collapses the time range filters using IN operators
 */
public class AbridgedTimeRangeWriter implements TimeRangeWriter {

    /**
     * Return IN clause for the partitions selected in the cube query
     *
     * @param cubeQueryContext cube query context
     * @param tableName        table name
     * @param parts            partitions
     * @return
     * @throws LensException
     */
    @Override
    public String getTimeRangeWhereClause(CubeQueryContext cubeQueryContext, String tableName,
            Set<FactPartition> parts) throws LensException {
        if (parts == null || parts.isEmpty()) {
            return "";
        }
        // Collect partition specs by column in a map
        // All filters which contain only a single column will be combined in an IN operator clause
        // This clause will be ORed with filters which contain multiple columns.
        List<String> subFilters = new ArrayList<String>();
        for (Map.Entry<Set<FactPartition>, Set<FactPartition>> entry : groupPartitions(parts).entrySet()) {
            List<String> clauses = new ArrayList<String>();
            String clause;
            clause = getClause(cubeQueryContext, tableName, entry.getKey());
            if (clause != null && !clause.isEmpty()) {
                clauses.add(clause);
            }
            clause = getClause(cubeQueryContext, tableName, entry.getValue());
            if (clause != null && !clause.isEmpty()) {
                clauses.add(clause);
            }
            subFilters.add("(" + StringUtils.join(clauses, " AND ") + ")");
        }
        return StringUtils.join(subFilters, " OR ");
    }

    private String getClause(CubeQueryContext cubeQueryContext, String tableName, Set<FactPartition> parts)
            throws LensException {
        Map<String, List<String>> partFilterMap = new HashMap<String, List<String>>();
        List<String> allTimeRangeFilters = new ArrayList<String>();

        for (FactPartition factPartition : parts) {
            String filter = TimeRangeUtils.getTimeRangePartitionFilter(factPartition, cubeQueryContext, tableName);
            if (filter.contains("AND")) {
                allTimeRangeFilters.add("(" + filter + ")");
            } else {
                extractColumnAndCondition(filter, partFilterMap);
            }
        }

        List<String> inClauses = new ArrayList<String>(partFilterMap.size());
        for (String column : partFilterMap.keySet()) {
            String clause = "(" + StringUtils.join(partFilterMap.get(column), ",") + ")";
            inClauses.add(column + " IN " + clause);
        }

        allTimeRangeFilters.add(StringUtils.join(inClauses, " AND "));
        return StringUtils.join(allTimeRangeFilters, " OR ");
    }

    /**
     * parts is a collection of FactPartition objects. And FactPartition can be viewed as two boolean conditions, one
     * specified by it's containingPart object, and another specified by itself in the form (partCol = partSpec)
     * <p/>
     * Collection of FactPartition objects can be viewed as an OR clause on all the FactPartition objects -- which by
     * itself is a binary AND clause.
     * <p/>
     * So Collection&lt;FactPartition&gt; is nothing but (a AND b) OR (c AND d) OR (e AND f) ...
     * <p/>
     * This function tries to reduce such a big clause by using Boolean arithmetic. The big thing it aims to reduce is the
     * following class of clauses:
     * <p/>
     * (a AND c) OR (a AND d) OR (b AND c) OR (b AND d) => ((a OR b) AND (c OR d))
     * <p/>
     * Equivalent return value for such a reduction would be an entry in the returned map from set(a,b) to set(c,d).
     * Assuming the argument was set(a(containing=c), a(containing=d), b(containing=c), b(containing=d))
     *
     * @param parts
     * @return
     */
    private Map<Set<FactPartition>, Set<FactPartition>> groupPartitions(Collection<FactPartition> parts) {
        Map<FactPartition, Set<FactPartition>> partitionSetMap = new HashMap<FactPartition, Set<FactPartition>>();
        for (FactPartition part : parts) {
            partitionSetMap.computeIfAbsent(part.getContainingPart(), k -> Sets.newTreeSet())
                    .add(part.withoutContaining());
        }
        Map<Set<FactPartition>, Set<FactPartition>> setSetOppositeMap = Maps.newHashMap();
        for (Map.Entry<FactPartition, Set<FactPartition>> entry : partitionSetMap.entrySet()) {
            setSetOppositeMap.computeIfAbsent(entry.getValue(), k -> Sets.newTreeSet());
            if (entry.getKey() != null) {
                setSetOppositeMap.get(entry.getValue()).add(entry.getKey());
            }
        }
        // inverse again
        return setSetOppositeMap.entrySet().stream().collect(toMap(Map.Entry::getValue, Map.Entry::getKey));
    }

    // This takes the output of filter generated by TimeRangeUtils.getTimeRangePartitionFilter
    // splits the filters by column names and filters are collected by column name in the
    // map passed as argument
    private void extractColumnAndCondition(String token, Map<String, List<String>> partFilterMap) {
        token = token.trim();

        String[] subTokens = StringUtils.split(token, '=');

        String column = subTokens[0].trim();
        String filterValue = subTokens[1].trim();

        partFilterMap.computeIfAbsent(column, k -> new ArrayList<>()).add(filterValue);
    }
}