Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.kylin.cube.model; import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.base.Preconditions.checkNotNull; import static com.google.common.base.Preconditions.checkState; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; import java.util.ArrayList; import java.util.Arrays; import java.util.BitSet; import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.LinkedHashMap; import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Set; import java.util.TreeSet; import org.apache.commons.codec.binary.Base64; import org.apache.commons.collections.CollectionUtils; import org.apache.commons.lang.ArrayUtils; import org.apache.commons.lang.StringUtils; import org.apache.kylin.common.KylinConfig; import org.apache.kylin.common.KylinConfigExt; import org.apache.kylin.common.KylinVersion; import org.apache.kylin.common.persistence.ResourceStore; import org.apache.kylin.common.persistence.RootPersistentEntity; import org.apache.kylin.common.util.Array; import org.apache.kylin.common.util.JsonUtil; import org.apache.kylin.common.util.Pair; import org.apache.kylin.cube.cuboid.CuboidScheduler; import org.apache.kylin.measure.MeasureType; import org.apache.kylin.measure.extendedcolumn.ExtendedColumnMeasureType; import org.apache.kylin.metadata.MetadataConstants; import org.apache.kylin.metadata.MetadataManager; import org.apache.kylin.metadata.model.ColumnDesc; import org.apache.kylin.metadata.model.DataModelDesc; import org.apache.kylin.metadata.model.FunctionDesc; import org.apache.kylin.metadata.model.IEngineAware; import org.apache.kylin.metadata.model.IStorageAware; import org.apache.kylin.metadata.model.JoinDesc; import org.apache.kylin.metadata.model.JoinTableDesc; import org.apache.kylin.metadata.model.MeasureDesc; import org.apache.kylin.metadata.model.TableRef; import org.apache.kylin.metadata.model.TblColRef; import org.apache.kylin.metadata.project.ProjectInstance; import org.apache.kylin.metadata.project.ProjectManager; import org.apache.kylin.metadata.realization.RealizationType; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.fasterxml.jackson.annotation.JsonAutoDetect; import com.fasterxml.jackson.annotation.JsonAutoDetect.Visibility; import com.fasterxml.jackson.annotation.JsonInclude; import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.core.JsonProcessingException; import com.google.common.collect.Iterables; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.collect.Sets; /** */ @SuppressWarnings("serial") @JsonAutoDetect(fieldVisibility = Visibility.NONE, getterVisibility = Visibility.NONE, isGetterVisibility = Visibility.NONE, setterVisibility = Visibility.NONE) public class CubeDesc extends RootPersistentEntity implements IEngineAware { private static final Logger logger = LoggerFactory.getLogger(CubeDesc.class); public static class CannotFilterExtendedColumnException extends RuntimeException { public CannotFilterExtendedColumnException(TblColRef tblColRef) { super(tblColRef == null ? "null" : tblColRef.getCanonicalName()); } } public enum DeriveType implements java.io.Serializable { LOOKUP, PK_FK, EXTENDED_COLUMN } public static class DeriveInfo implements java.io.Serializable { public DeriveType type; public JoinDesc join; public TblColRef[] columns; public boolean isOneToOne; // only used when ref from derived to host DeriveInfo(DeriveType type, JoinDesc join, TblColRef[] columns, boolean isOneToOne) { this.type = type; this.join = join; this.columns = columns; this.isOneToOne = isOneToOne; } @Override public String toString() { return "DeriveInfo [type=" + type + ", join=" + join + ", columns=" + Arrays.toString(columns) + ", isOneToOne=" + isOneToOne + "]"; } } private KylinConfigExt config; private DataModelDesc model; @JsonProperty("name") private String name; @JsonProperty("is_draft") private boolean isDraft; @JsonProperty("model_name") private String modelName; @JsonProperty("description") private String description; @JsonProperty("null_string") private String[] nullStrings; @JsonProperty("dimensions") private List<DimensionDesc> dimensions; @JsonProperty("measures") private List<MeasureDesc> measures; @JsonProperty("dictionaries") @JsonInclude(JsonInclude.Include.NON_NULL) private List<DictionaryDesc> dictionaries; @JsonProperty("rowkey") private RowKeyDesc rowkey; @JsonProperty("hbase_mapping") private HBaseMappingDesc hbaseMapping; @JsonProperty("aggregation_groups") private List<AggregationGroup> aggregationGroups; @JsonProperty("signature") private String signature; @JsonProperty("notify_list") private List<String> notifyList; @JsonProperty("status_need_notify") private List<String> statusNeedNotify = Collections.emptyList(); @JsonProperty("partition_date_start") private long partitionDateStart = 0L; @JsonProperty("partition_date_end") private long partitionDateEnd = 3153600000000L; @JsonProperty("auto_merge_time_ranges") private long[] autoMergeTimeRanges; @JsonProperty("retention_range") private long retentionRange = 0; @JsonProperty("engine_type") private int engineType = IEngineAware.ID_MR_V1; @JsonProperty("storage_type") private int storageType = IStorageAware.ID_HBASE; @JsonProperty("override_kylin_properties") private LinkedHashMap<String, String> overrideKylinProps = new LinkedHashMap<String, String>(); private LinkedHashSet<TblColRef> allColumns = new LinkedHashSet<>(); private LinkedHashSet<ColumnDesc> allColumnDescs = new LinkedHashSet<>(); private LinkedHashSet<TblColRef> dimensionColumns = new LinkedHashSet<>(); private Map<TblColRef, DeriveInfo> derivedToHostMap = Maps.newHashMap(); private Map<Array<TblColRef>, List<DeriveInfo>> hostToDerivedMap = Maps.newHashMap(); private Map<TblColRef, DeriveInfo> extendedColumnToHosts = Maps.newHashMap(); @JsonProperty("partition_offset_start") @JsonInclude(JsonInclude.Include.NON_EMPTY) private Map<Integer, Long> partitionOffsetStart = Maps.newHashMap(); @JsonProperty("cuboid_black_list") @JsonInclude(JsonInclude.Include.NON_NULL) private Set<Long> cuboidBlackSet = Sets.newHashSet(); @JsonProperty("parent_forward") @JsonInclude(JsonInclude.Include.NON_NULL) private int parentForward = 3; // allCuboids and parent2Child lazy built private Set<Long> allCuboids; private Map<Long, List<Long>> parent2Child; private byte[] cuboidTreeLock = new byte[0]; public boolean isEnableSharding() { //in the future may extend to other storage that is shard-able return storageType != IStorageAware.ID_HBASE && storageType != IStorageAware.ID_HYBRID; } public Set<TblColRef> getShardByColumns() { return getRowkey().getShardByColumns(); } /** * Error messages during resolving json metadata */ private List<String> errors = new ArrayList<String>(); /** * @return all columns this cube can support, including derived */ public Set<TblColRef> listAllColumns() { return allColumns == null ? null : Collections.unmodifiableSet(allColumns); } public Set<ColumnDesc> listAllColumnDescs() { return allColumnDescs == null ? null : Collections.unmodifiableSet(allColumnDescs); } /** * @return dimension columns including derived, BUT NOT measures */ public Set<TblColRef> listDimensionColumnsIncludingDerived() { return dimensionColumns == null ? null : Collections.unmodifiableSet(dimensionColumns); } /** * @return dimension columns excluding derived */ public List<TblColRef> listDimensionColumnsExcludingDerived(boolean alsoExcludeExtendedCol) { List<TblColRef> result = new ArrayList<TblColRef>(); for (TblColRef col : dimensionColumns) { if (isDerived(col)) { continue; } if (alsoExcludeExtendedCol && isExtendedColumn(col)) { continue; } result.add(col); } return result; } /** * @return all functions from each measure. */ public List<FunctionDesc> listAllFunctions() { List<FunctionDesc> functions = new ArrayList<FunctionDesc>(); for (MeasureDesc m : measures) { functions.add(m.getFunction()); } return functions; } public TblColRef findColumnRef(String table, String column) { return model.findColumn(table, column); } public DimensionDesc findDimensionByTable(String lookupTableName) { lookupTableName = lookupTableName.toUpperCase(); for (DimensionDesc dim : dimensions) if (dim.getTableRef() != null && dim.getTableRef().getTableIdentity().equals(lookupTableName)) return dim; return null; } public boolean hasHostColumn(TblColRef col) { return isDerived(col) || isExtendedColumn(col); } public boolean isDerived(TblColRef col) { return derivedToHostMap.containsKey(col); } public boolean isExtendedColumn(TblColRef col) { return extendedColumnToHosts.containsKey(col); } public DeriveInfo getHostInfo(TblColRef derived) { if (isDerived(derived)) { return derivedToHostMap.get(derived); } else if (isExtendedColumn(derived)) { return extendedColumnToHosts.get(derived); } throw new RuntimeException("Cannot get host info for " + derived); } public Map<Array<TblColRef>, List<DeriveInfo>> getHostToDerivedInfo(List<TblColRef> rowCols, Collection<TblColRef> wantedCols) { Map<Array<TblColRef>, List<DeriveInfo>> result = new HashMap<Array<TblColRef>, List<DeriveInfo>>(); for (Entry<Array<TblColRef>, List<DeriveInfo>> entry : hostToDerivedMap.entrySet()) { Array<TblColRef> hostCols = entry.getKey(); boolean hostOnRow = rowCols.containsAll(Arrays.asList(hostCols.data)); if (!hostOnRow) continue; List<DeriveInfo> wantedInfo = new ArrayList<DeriveInfo>(); for (DeriveInfo info : entry.getValue()) { if (wantedCols == null || Collections.disjoint(wantedCols, Arrays.asList(info.columns)) == false) // has any wanted columns? wantedInfo.add(info); } if (wantedInfo.size() > 0) result.put(hostCols, wantedInfo); } return result; } public String getResourcePath() { return concatResourcePath(name); } public static String concatResourcePath(String descName) { return ResourceStore.CUBE_DESC_RESOURCE_ROOT + "/" + descName + MetadataConstants.FILE_SURFIX; } // ============================================================================ public KylinConfig getConfig() { return config; } private void setConfig(KylinConfigExt config) { this.config = config; } public String getName() { return name; } public void setName(String name) { this.name = name; } public boolean isDraft() { return isDraft; } public void setDraft(boolean isDraft) { this.isDraft = isDraft; } public String getModelName() { return modelName; } public void setModelName(String modelName) { this.modelName = modelName; } public DataModelDesc getModel() { return model; } public void setModel(DataModelDesc model) { this.model = model; } public String getDescription() { return description; } public void setDescription(String description) { this.description = description; } public String[] getNullStrings() { return nullStrings; } public List<DimensionDesc> getDimensions() { return dimensions == null ? null : Collections.unmodifiableList(dimensions); } public void setDimensions(List<DimensionDesc> dimensions) { this.dimensions = dimensions; } public List<MeasureDesc> getMeasures() { return measures == null ? null : Collections.unmodifiableList(measures); } public void setMeasures(List<MeasureDesc> measures) { this.measures = measures; } public List<DictionaryDesc> getDictionaries() { return dictionaries == null ? null : Collections.unmodifiableList(dictionaries); } public void setDictionaries(List<DictionaryDesc> dictionaries) { this.dictionaries = dictionaries; } public RowKeyDesc getRowkey() { return rowkey; } public void setRowkey(RowKeyDesc rowkey) { this.rowkey = rowkey; } public List<AggregationGroup> getAggregationGroups() { return aggregationGroups == null ? null : Collections.unmodifiableList(aggregationGroups); } public void setAggregationGroups(List<AggregationGroup> aggregationGroups) { this.aggregationGroups = aggregationGroups; } public String getSignature() { return signature; } public void setSignature(String signature) { this.signature = signature; } public List<String> getNotifyList() { return notifyList == null ? null : Collections.unmodifiableList(notifyList); } public void setNotifyList(List<String> notifyList) { this.notifyList = notifyList; } public List<String> getStatusNeedNotify() { return statusNeedNotify == null ? null : Collections.unmodifiableList(statusNeedNotify); } public void setStatusNeedNotify(List<String> statusNeedNotify) { this.statusNeedNotify = statusNeedNotify; } public LinkedHashMap<String, String> getOverrideKylinProps() { return overrideKylinProps; } private void setOverrideKylinProps(LinkedHashMap<String, String> overrideKylinProps) { this.overrideKylinProps = overrideKylinProps; } @Override public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; CubeDesc cubeDesc = (CubeDesc) o; if (!name.equals(cubeDesc.name)) return false; if (!modelName.equals(cubeDesc.modelName)) return false; return true; } /** * Get cuboid level count except base cuboid * @return */ public int getBuildLevel() { return new CuboidScheduler(this).getCuboidsByLayer().size() - 1; } @Override public int hashCode() { int result = 0; result = 31 * result + name.hashCode(); result = 31 * result + model.getRootFactTable().hashCode(); return result; } @Override public String toString() { return "CubeDesc [name=" + name + "]"; } /** * this method is to prevent malicious metadata change by checking the saved signature * with the calculated signature. * <p> * if you're comparing two cube descs, prefer to use consistentWith() * * @return */ public boolean checkSignature() { if (this.getConfig().isIgnoreCubeSignatureInconsistency()) { logger.info("Skip checking cube signature"); return true; } KylinVersion cubeVersion = new KylinVersion(getVersion()); KylinVersion kylinVersion = KylinVersion.getCurrentVersion(); if (!kylinVersion.isCompatibleWith(cubeVersion)) { logger.info("checkSignature on {} is skipped as the its version {} is different from kylin version {}", getName(), cubeVersion, kylinVersion); return true; } if (kylinVersion.isCompatibleWith(cubeVersion) && !kylinVersion.isSignatureCompatibleWith(cubeVersion)) { logger.info( "checkSignature on {} is skipped as the its version is {} (not signature compatible but compatible) ", getName(), cubeVersion); return true; } if (StringUtils.isBlank(getSignature())) { return true; } String calculated = calculateSignature(); String saved = getSignature(); return calculated.equals(saved); } public boolean consistentWith(CubeDesc another) { if (another == null) return false; return this.calculateSignature().equals(another.calculateSignature()); } public String calculateSignature() { MessageDigest md; try { md = MessageDigest.getInstance("MD5"); StringBuilder sigString = new StringBuilder(); sigString.append(this.name).append("|")// .append(JsonUtil.writeValueAsString(this.modelName)).append("|")// .append(JsonUtil.writeValueAsString(this.nullStrings)).append("|")// .append(JsonUtil.writeValueAsString(this.dimensions)).append("|")// .append(JsonUtil.writeValueAsString(this.measures)).append("|")// .append(JsonUtil.writeValueAsString(this.rowkey)).append("|")// .append(JsonUtil.writeValueAsString(this.aggregationGroups)).append("|")// .append(JsonUtil.writeValueAsString(this.hbaseMapping)).append("|")// .append(JsonUtil.writeValueAsString(this.engineType)).append("|")// .append(JsonUtil.writeValueAsString(this.storageType)).append("|"); String signatureInput = sigString.toString().replaceAll("\\s+", "").toLowerCase(); byte[] signature = md.digest(signatureInput.getBytes()); String ret = new String(Base64.encodeBase64(signature)); return ret; } catch (NoSuchAlgorithmException | JsonProcessingException e) { throw new RuntimeException("Failed to calculate signature"); } } public void deInit() { config = null; model = null; allColumns = new LinkedHashSet<>(); allColumnDescs = new LinkedHashSet<>(); dimensionColumns = new LinkedHashSet<>(); derivedToHostMap = Maps.newHashMap(); hostToDerivedMap = Maps.newHashMap(); extendedColumnToHosts = Maps.newHashMap(); cuboidBlackSet = Sets.newHashSet(); synchronized (cuboidTreeLock) { allCuboids = null; parent2Child = null; } } public void init(KylinConfig config) { this.errors.clear(); checkArgument(StringUtils.isNotBlank(name), "CubeDesc name is blank"); checkArgument(StringUtils.isNotBlank(modelName), "CubeDesc (%s) has blank model name", name); // note CubeDesc.name == CubeInstance.name List<ProjectInstance> ownerPrj = ProjectManager.getInstance(config).findProjects(RealizationType.CUBE, name); // cube inherit the project override props if (ownerPrj.size() == 1) { Map<String, String> prjOverrideProps = ownerPrj.get(0).getOverrideKylinProps(); for (Entry<String, String> entry : prjOverrideProps.entrySet()) { if (!overrideKylinProps.containsKey(entry.getKey())) { overrideKylinProps.put(entry.getKey(), entry.getValue()); } } } this.config = KylinConfigExt.createInstance(config, overrideKylinProps); this.model = MetadataManager.getInstance(config).getDataModelDesc(modelName); checkNotNull(this.model, "DateModelDesc(%s) not found", modelName); for (DimensionDesc dim : dimensions) { dim.init(this); } initDimensionColumns(); initMeasureColumns(); rowkey.init(this); for (AggregationGroup agg : this.aggregationGroups) { agg.init(this, rowkey); } validateAggregationGroups(); // check if aggregation group is valid validateAggregationGroupsCombination(); if (hbaseMapping != null) { hbaseMapping.init(this); } initMeasureReferenceToColumnFamily(); // check all dimension columns are presented on rowkey List<TblColRef> dimCols = listDimensionColumnsExcludingDerived(true); checkState(rowkey.getRowKeyColumns().length == dimCols.size(), "RowKey columns count (%s) doesn't match dimensions columns count (%s)", rowkey.getRowKeyColumns().length, dimCols.size()); initDictionaryDesc(); amendAllColumns(); } private void buildCuboidTree() { synchronized (cuboidTreeLock) { if (allCuboids == null || parent2Child == null) { Pair<Set<Long>, Map<Long, List<Long>>> ret = new CuboidScheduler(this).buildTreeBottomUp(); allCuboids = ret.getFirst(); parent2Child = ret.getSecond(); } } } public boolean isBlackedCuboid(long cuboidID) { return cuboidBlackSet.contains(cuboidID); } public void validateAggregationGroupsCombination() { int index = 0; for (AggregationGroup agg : getAggregationGroups()) { long combination = 0L; try { combination = agg.calculateCuboidCombination(); } catch (Exception ex) { combination = config.getCubeAggrGroupMaxCombination() + 1; } finally { if (combination > config.getCubeAggrGroupMaxCombination()) { String msg = "Aggregation group " + index + " has too many combinations, use 'mandatory'/'hierarchy'/'joint' to optimize; or update 'kylin.cube.aggrgroup.max-combination' to a bigger value."; logger.error("Aggregation group " + index + " has " + combination + " combinations;"); logger.error(msg); throw new IllegalStateException(msg); } } index++; } } public void validateAggregationGroups() { int index = 0; for (AggregationGroup agg : getAggregationGroups()) { if (agg.getIncludes() == null) { logger.error("Aggregation group " + index + " 'includes' field not set"); throw new IllegalStateException("Aggregation group " + index + " includes field not set"); } if (agg.getSelectRule() == null) { logger.error("Aggregation group " + index + " 'select_rule' field not set"); throw new IllegalStateException("Aggregation group " + index + " select rule field not set"); } Set<String> includeDims = new TreeSet<>(String.CASE_INSENSITIVE_ORDER); getDims(includeDims, agg.getIncludes()); Set<String> mandatoryDims = new TreeSet<>(String.CASE_INSENSITIVE_ORDER); getDims(mandatoryDims, agg.getSelectRule().mandatoryDims); ArrayList<Set<String>> hierarchyDimsList = Lists.newArrayList(); Set<String> hierarchyDims = new TreeSet<>(String.CASE_INSENSITIVE_ORDER); getDims(hierarchyDimsList, hierarchyDims, agg.getSelectRule().hierarchyDims); ArrayList<Set<String>> jointDimsList = Lists.newArrayList(); Set<String> jointDims = new TreeSet<>(String.CASE_INSENSITIVE_ORDER); getDims(jointDimsList, jointDims, agg.getSelectRule().jointDims); if (!includeDims.containsAll(mandatoryDims) || !includeDims.containsAll(hierarchyDims) || !includeDims.containsAll(jointDims)) { List<String> notIncluded = Lists.newArrayList(); final Iterable<String> all = Iterables .unmodifiableIterable(Iterables.concat(mandatoryDims, hierarchyDims, jointDims)); for (String dim : all) { if (includeDims.contains(dim) == false) { notIncluded.add(dim); } } Collections.sort(notIncluded); logger.error("Aggregation group " + index + " Include dimensions not containing all the used dimensions"); throw new IllegalStateException("Aggregation group " + index + " 'includes' dimensions not include all the dimensions:" + notIncluded.toString()); } if (CollectionUtils.containsAny(mandatoryDims, hierarchyDims)) { logger.warn( "Aggregation group " + index + " mandatory dimensions overlap with hierarchy dimensions: " + ensureOrder(CollectionUtils.intersection(mandatoryDims, hierarchyDims))); } if (CollectionUtils.containsAny(mandatoryDims, jointDims)) { logger.warn("Aggregation group " + index + " mandatory dimensions overlap with joint dimensions: " + ensureOrder(CollectionUtils.intersection(mandatoryDims, jointDims))); } if (CollectionUtils.containsAny(hierarchyDims, jointDims)) { logger.error("Aggregation group " + index + " hierarchy dimensions overlap with joint dimensions"); throw new IllegalStateException( "Aggregation group " + index + " hierarchy dimensions overlap with joint dimensions: " + ensureOrder(CollectionUtils.intersection(hierarchyDims, jointDims))); } if (hasSingleOrNone(hierarchyDimsList)) { logger.error("Aggregation group " + index + " require at least 2 dimensions in a hierarchy"); throw new IllegalStateException( "Aggregation group " + index + " require at least 2 dimensions in a hierarchy."); } if (hasSingleOrNone(jointDimsList)) { logger.error("Aggregation group " + index + " require at least 2 dimensions in a joint"); throw new IllegalStateException( "Aggregation group " + index + " require at least 2 dimensions in a joint"); } Pair<Boolean, Set<String>> overlap = hasOverlap(hierarchyDimsList, hierarchyDims); if (overlap.getFirst() == true) { logger.error("Aggregation group " + index + " a dimension exist in more than one hierarchy: " + ensureOrder(overlap.getSecond())); throw new IllegalStateException("Aggregation group " + index + " a dimension exist in more than one hierarchy: " + ensureOrder(overlap.getSecond())); } overlap = hasOverlap(jointDimsList, jointDims); if (overlap.getFirst() == true) { logger.error("Aggregation group " + index + " a dimension exist in more than one joint: " + ensureOrder(overlap.getSecond())); throw new IllegalStateException("Aggregation group " + index + " a dimension exist in more than one joint: " + ensureOrder(overlap.getSecond())); } index++; } } private void getDims(Set<String> dims, String[] stringSet) { if (stringSet != null) { for (String str : stringSet) { dims.add(str); } } } private void getDims(ArrayList<Set<String>> dimsList, Set<String> dims, String[][] stringSets) { if (stringSets != null) { for (String[] ss : stringSets) { Set<String> temp = new TreeSet<>(String.CASE_INSENSITIVE_ORDER); for (String s : ss) { temp.add(s); dims.add(s); } dimsList.add(temp); } } } private boolean hasSingleOrNone(ArrayList<Set<String>> dimsList) { boolean hasSingleOrNone = false; for (Set<String> dims : dimsList) { if (dims.size() <= 1) { hasSingleOrNone = true; break; } } return hasSingleOrNone; } private Pair<Boolean, Set<String>> hasOverlap(ArrayList<Set<String>> dimsList, Set<String> Dims) { Set<String> existing = new HashSet<>(); Set<String> overlap = new HashSet<>(); for (Set<String> dims : dimsList) { if (CollectionUtils.containsAny(existing, dims)) { overlap.addAll(ensureOrder(CollectionUtils.intersection(existing, dims))); } existing.addAll(dims); } return new Pair<>(overlap.size() > 0, overlap); } private void initDimensionColumns() { for (DimensionDesc dim : dimensions) { JoinDesc join = dim.getJoin(); // init dimension columns ArrayList<TblColRef> dimCols = Lists.newArrayList(); String colStr = dim.getColumn(); if ((colStr == null && dim.isDerived()) || ("{FK}".equalsIgnoreCase(colStr))) { // when column is omitted, special case for (TblColRef col : join.getForeignKeyColumns()) { dimCols.add(initDimensionColRef(col)); } } else { // normal case checkState(!StringUtils.isEmpty(colStr), "Dimension column must not be blank: %s", dim); dimCols.add(initDimensionColRef(dim, colStr)); } TblColRef[] dimColArray = dimCols.toArray(new TblColRef[dimCols.size()]); dim.setColumnRefs(dimColArray); // init derived columns if (dim.isDerived()) { String[] derived = dim.getDerived(); String[][] split = splitDerivedColumnAndExtra(derived); String[] derivedNames = split[0]; String[] derivedExtra = split[1]; TblColRef[] derivedCols = new TblColRef[derivedNames.length]; for (int i = 0; i < derivedNames.length; i++) { derivedCols[i] = initDimensionColRef(dim, derivedNames[i]); } initDerivedMap(dimColArray, DeriveType.LOOKUP, join, derivedCols, derivedExtra); } if (join != null) { allColumns.addAll(Arrays.asList(join.getForeignKeyColumns())); allColumns.addAll(Arrays.asList(join.getPrimaryKeyColumns())); } } // PK-FK derive the other side Set<TblColRef> realDimensions = new HashSet<>(listDimensionColumnsExcludingDerived(true)); for (JoinTableDesc joinTable : model.getJoinTables()) { JoinDesc join = joinTable.getJoin(); int n = join.getForeignKeyColumns().length; for (int i = 0; i < n; i++) { TblColRef pk = join.getPrimaryKeyColumns()[i]; TblColRef fk = join.getForeignKeyColumns()[i]; if (realDimensions.contains(pk) && !realDimensions.contains(fk)) { initDimensionColRef(fk); initDerivedMap(new TblColRef[] { pk }, DeriveType.PK_FK, join, new TblColRef[] { fk }, null); } else if (realDimensions.contains(fk) && !realDimensions.contains(pk)) { initDimensionColRef(pk); initDerivedMap(new TblColRef[] { fk }, DeriveType.PK_FK, join, new TblColRef[] { pk }, null); } } } } private String[][] splitDerivedColumnAndExtra(String[] derived) { String[] cols = new String[derived.length]; String[] extra = new String[derived.length]; for (int i = 0; i < derived.length; i++) { String str = derived[i]; int cut = str.indexOf(":"); if (cut >= 0) { cols[i] = str.substring(0, cut); extra[i] = str.substring(cut + 1).trim(); } else { cols[i] = str; extra[i] = ""; } } return new String[][] { cols, extra }; } private void initDerivedMap(TblColRef[] hostCols, DeriveType type, JoinDesc join, TblColRef[] derivedCols, String[] extra) { if (hostCols.length == 0 || derivedCols.length == 0) throw new IllegalStateException("host/derived columns must not be empty"); // Although FK derives PK automatically, user unaware of this can declare PK as derived dimension explicitly. // In that case, derivedCols[] will contain a FK which is transformed from the PK by initDimensionColRef(). // Must drop FK from derivedCols[] before continue. for (int i = 0; i < derivedCols.length; i++) { if (ArrayUtils.contains(hostCols, derivedCols[i])) { derivedCols = (TblColRef[]) ArrayUtils.remove(derivedCols, i); if (extra != null) extra = (String[]) ArrayUtils.remove(extra, i); i--; } } if (derivedCols.length == 0) return; for (int i = 0; i < derivedCols.length; i++) { TblColRef derivedCol = derivedCols[i]; boolean isOneToOne = type == DeriveType.PK_FK || ArrayUtils.contains(hostCols, derivedCol) || (extra != null && extra[i].contains("1-1")); derivedToHostMap.put(derivedCol, new DeriveInfo(type, join, hostCols, isOneToOne)); } Array<TblColRef> hostColArray = new Array<TblColRef>(hostCols); List<DeriveInfo> infoList = hostToDerivedMap.get(hostColArray); if (infoList == null) { infoList = new ArrayList<DeriveInfo>(); hostToDerivedMap.put(hostColArray, infoList); } // Merged duplicated derived column List<TblColRef> whatsLeft = new ArrayList<>(); for (TblColRef derCol : derivedCols) { boolean merged = false; for (DeriveInfo existing : infoList) { if (existing.type == type && existing.join.getPKSide().equals(join.getPKSide())) { if (ArrayUtils.contains(existing.columns, derCol)) { merged = true; break; } if (type == DeriveType.LOOKUP) { existing.columns = (TblColRef[]) ArrayUtils.add(existing.columns, derCol); merged = true; break; } } } if (!merged) whatsLeft.add(derCol); } if (whatsLeft.size() > 0) { infoList.add(new DeriveInfo(type, join, (TblColRef[]) whatsLeft.toArray(new TblColRef[whatsLeft.size()]), false)); } } private TblColRef initDimensionColRef(DimensionDesc dim, String colName) { TblColRef col = model.findColumn(dim.getTable(), colName); // for backward compatibility if (KylinVersion.isBefore200(getVersion())) { // always use FK instead PK, FK could be shared by more than one lookup tables JoinDesc join = dim.getJoin(); if (join != null) { int idx = ArrayUtils.indexOf(join.getPrimaryKeyColumns(), col); if (idx >= 0) { col = join.getForeignKeyColumns()[idx]; } } } return initDimensionColRef(col); } private TblColRef initDimensionColRef(TblColRef col) { allColumns.add(col); dimensionColumns.add(col); return col; } @SuppressWarnings("deprecation") private void initMeasureColumns() { if (measures == null || measures.isEmpty()) { return; } for (MeasureDesc m : measures) { m.setName(m.getName().toUpperCase()); if (m.getDependentMeasureRef() != null) { m.setDependentMeasureRef(m.getDependentMeasureRef().toUpperCase()); } FunctionDesc func = m.getFunction(); func.init(model); allColumns.addAll(func.getParameter().getColRefs()); if (ExtendedColumnMeasureType.FUNC_EXTENDED_COLUMN.equalsIgnoreCase(m.getFunction().getExpression())) { FunctionDesc functionDesc = m.getFunction(); List<TblColRef> hosts = ExtendedColumnMeasureType.getExtendedColumnHosts(functionDesc); TblColRef extendedColumn = ExtendedColumnMeasureType.getExtendedColumn(functionDesc); initExtendedColumnMap(hosts.toArray(new TblColRef[hosts.size()]), extendedColumn); } } } private void initExtendedColumnMap(TblColRef[] hostCols, TblColRef extendedColumn) { extendedColumnToHosts.put(extendedColumn, new DeriveInfo(DeriveType.EXTENDED_COLUMN, null, hostCols, false)); } private void initMeasureReferenceToColumnFamily() { if (measures == null || measures.size() == 0) return; Map<String, MeasureDesc> measureLookup = new HashMap<String, MeasureDesc>(); for (MeasureDesc m : measures) measureLookup.put(m.getName(), m); Map<String, Integer> measureIndexLookup = new HashMap<String, Integer>(); for (int i = 0; i < measures.size(); i++) measureIndexLookup.put(measures.get(i).getName(), i); BitSet checkEachMeasureExist = new BitSet(); for (HBaseColumnFamilyDesc cf : getHbaseMapping().getColumnFamily()) { for (HBaseColumnDesc c : cf.getColumns()) { String[] colMeasureRefs = c.getMeasureRefs(); MeasureDesc[] measureDescs = new MeasureDesc[colMeasureRefs.length]; int[] measureIndex = new int[colMeasureRefs.length]; for (int i = 0; i < colMeasureRefs.length; i++) { measureDescs[i] = measureLookup.get(colMeasureRefs[i]); checkState(measureDescs[i] != null, "measure desc at (%s) is null", i); measureIndex[i] = measureIndexLookup.get(colMeasureRefs[i]); checkState(measureIndex[i] >= 0, "measure index at (%s) not positive", i); checkEachMeasureExist.set(measureIndex[i]); } c.setMeasures(measureDescs); c.setMeasureIndex(measureIndex); c.setColumnFamilyName(cf.getName()); } } for (int i = 0; i < measures.size(); i++) { checkState(checkEachMeasureExist.get(i), "measure (%s) does not exist in column familyor measure duplicates", measures.get(i)); } } private void initDictionaryDesc() { if (dictionaries != null) { for (DictionaryDesc dictDesc : dictionaries) { dictDesc.init(this); allColumns.add(dictDesc.getColumnRef()); if (dictDesc.getResuseColumnRef() != null) { allColumns.add(dictDesc.getResuseColumnRef()); } } } } public TblColRef getColumnByBitIndex(int bitIndex) { RowKeyColDesc[] rowKeyColumns = this.getRowkey().getRowKeyColumns(); return rowKeyColumns[rowKeyColumns.length - 1 - bitIndex].getColRef(); } public boolean hasMemoryHungryMeasures() { for (MeasureDesc measure : measures) { if (measure.getFunction().getMeasureType().isMemoryHungry()) { return true; } } return false; } private void amendAllColumns() { // make sure all PF/FK are included, thus become exposed to calcite later Set<TableRef> tables = collectTablesOnJoinChain(allColumns); for (TableRef t : tables) { JoinDesc join = model.getJoinByPKSide(t); if (join != null) { allColumns.addAll(Arrays.asList(join.getForeignKeyColumns())); allColumns.addAll(Arrays.asList(join.getPrimaryKeyColumns())); } } for (TblColRef col : allColumns) { allColumnDescs.add(col.getColumnDesc()); } } private Set<TableRef> collectTablesOnJoinChain(Set<TblColRef> columns) { Set<TableRef> result = new HashSet<>(); for (TblColRef col : columns) { TableRef t = col.getTableRef(); while (t != null) { result.add(t); JoinDesc join = model.getJoinByPKSide(t); t = join == null ? null : join.getFKSide(); } } return result; } public long getRetentionRange() { return retentionRange; } public void setRetentionRange(long retentionRange) { this.retentionRange = retentionRange; } public long[] getAutoMergeTimeRanges() { return autoMergeTimeRanges; } public void setAutoMergeTimeRanges(long[] autoMergeTimeRanges) { this.autoMergeTimeRanges = autoMergeTimeRanges; } public void addError(String message) { this.errors.add(message); } public List<String> getError() { return this.errors; } public String getErrorMsg() { StringBuffer sb = new StringBuffer(); for (String error : errors) { sb.append(error + " "); } return sb.toString(); } public HBaseMappingDesc getHbaseMapping() { return hbaseMapping; } public void setHbaseMapping(HBaseMappingDesc hbaseMapping) { this.hbaseMapping = hbaseMapping; } public void setNullStrings(String[] nullStrings) { this.nullStrings = nullStrings; } public boolean supportsLimitPushDown() { return getStorageType() != IStorageAware.ID_HBASE && getStorageType() != IStorageAware.ID_HYBRID; } public int getStorageType() { return storageType; } public void setStorageType(int storageType) { this.storageType = storageType; } @Override public int getEngineType() { return engineType; } public void setEngineType(int engineType) { this.engineType = engineType; } public long getPartitionDateStart() { return partitionDateStart; } public void setPartitionDateStart(long partitionDateStart) { this.partitionDateStart = partitionDateStart; } public long getPartitionDateEnd() { return partitionDateEnd; } public void setPartitionDateEnd(long partitionDateEnd) { this.partitionDateEnd = partitionDateEnd; } public Map<Integer, Long> getPartitionOffsetStart() { return partitionOffsetStart; } public void setPartitionOffsetStart(Map<Integer, Long> partitionOffsetStart) { this.partitionOffsetStart = partitionOffsetStart; } public Set<Long> getAllCuboids() { buildCuboidTree(); return allCuboids; } public Map<Long, List<Long>> getParent2Child() { buildCuboidTree(); return parent2Child; } public int getParentForward() { return parentForward; } public void setParentForward(int parentForward) { this.parentForward = parentForward; } /** * Get columns that have dictionary */ public Set<TblColRef> getAllColumnsHaveDictionary() { Set<TblColRef> result = Sets.newLinkedHashSet(); // dictionaries in dimensions for (RowKeyColDesc rowKeyColDesc : rowkey.getRowKeyColumns()) { TblColRef colRef = rowKeyColDesc.getColRef(); if (rowkey.isUseDictionary(colRef)) { result.add(colRef); } } // dictionaries in measures for (MeasureDesc measure : measures) { MeasureType<?> aggrType = measure.getFunction().getMeasureType(); result.addAll(aggrType.getColumnsNeedDictionary(measure.getFunction())); } // any additional dictionaries if (dictionaries != null) { for (DictionaryDesc dictDesc : dictionaries) { TblColRef col = dictDesc.getColumnRef(); result.add(col); } } return result; } /** * Get columns that need dictionary built on it. Note a column could reuse dictionary of another column. */ public Set<TblColRef> getAllColumnsNeedDictionaryBuilt() { Set<TblColRef> result = getAllColumnsHaveDictionary(); // remove columns that reuse other's dictionary if (dictionaries != null) { for (DictionaryDesc dictDesc : dictionaries) { if (dictDesc.getResuseColumnRef() != null) { result.remove(dictDesc.getColumnRef()); result.add(dictDesc.getResuseColumnRef()); } } } return result; } /** * A column may reuse dictionary of another column, find the dict column, return same col if there's no reuse column */ public TblColRef getDictionaryReuseColumn(TblColRef col) { if (dictionaries == null) { return col; } for (DictionaryDesc dictDesc : dictionaries) { if (dictDesc.getColumnRef().equals(col) && dictDesc.getResuseColumnRef() != null) { return dictDesc.getResuseColumnRef(); } } return col; } /** * Get a column which can be used in distributing the source table */ public TblColRef getDistributedByColumn() { Set<TblColRef> shardBy = getShardByColumns(); if (shardBy != null && shardBy.size() > 0) { return shardBy.iterator().next(); } return null; } /** Get a column which can be used to cluster the source table. * To reduce memory footprint in base cuboid for global dict */ // TODO handle more than one ultra high cardinality columns use global dict in one cube TblColRef getClusteredByColumn() { if (getDistributedByColumn() != null) { return null; } if (dictionaries == null) { return null; } String clusterByColumn = config.getFlatHiveTableClusterByDictColumn(); for (DictionaryDesc dictDesc : dictionaries) { if (dictDesc.getColumnRef().getName().equalsIgnoreCase(clusterByColumn)) { return dictDesc.getColumnRef(); } } return null; } public String getDictionaryBuilderClass(TblColRef col) { if (dictionaries == null) return null; for (DictionaryDesc desc : dictionaries) { if (desc.getBuilderClass() != null) { // column that reuses other's dict need not be built, thus should not reach here if (col.equals(desc.getColumnRef())) { return desc.getBuilderClass(); } } } return null; } public static CubeDesc getCopyOf(CubeDesc cubeDesc) { CubeDesc newCubeDesc = new CubeDesc(); newCubeDesc.setName(cubeDesc.getName()); newCubeDesc.setDraft(cubeDesc.isDraft()); newCubeDesc.setModelName(cubeDesc.getModelName()); newCubeDesc.setDescription(cubeDesc.getDescription()); newCubeDesc.setNullStrings(cubeDesc.getNullStrings()); newCubeDesc.setDimensions(cubeDesc.getDimensions()); newCubeDesc.setMeasures(cubeDesc.getMeasures()); newCubeDesc.setDictionaries(cubeDesc.getDictionaries()); newCubeDesc.setRowkey(cubeDesc.getRowkey()); newCubeDesc.setHbaseMapping(cubeDesc.getHbaseMapping()); newCubeDesc.setSignature(cubeDesc.getSignature()); newCubeDesc.setNotifyList(cubeDesc.getNotifyList()); newCubeDesc.setStatusNeedNotify(cubeDesc.getStatusNeedNotify()); newCubeDesc.setAutoMergeTimeRanges(cubeDesc.getAutoMergeTimeRanges()); newCubeDesc.setPartitionDateStart(cubeDesc.getPartitionDateStart()); newCubeDesc.setPartitionDateEnd(cubeDesc.getPartitionDateEnd()); newCubeDesc.setRetentionRange(cubeDesc.getRetentionRange()); newCubeDesc.setEngineType(cubeDesc.getEngineType()); newCubeDesc.setStorageType(cubeDesc.getStorageType()); newCubeDesc.setAggregationGroups(cubeDesc.getAggregationGroups()); newCubeDesc.setOverrideKylinProps(cubeDesc.getOverrideKylinProps()); newCubeDesc.setConfig((KylinConfigExt) cubeDesc.getConfig()); newCubeDesc.setPartitionOffsetStart(cubeDesc.getPartitionOffsetStart()); newCubeDesc.setVersion(cubeDesc.getVersion()); newCubeDesc.setParentForward(cubeDesc.getParentForward()); newCubeDesc.updateRandomUuid(); return newCubeDesc; } private Collection ensureOrder(Collection c) { TreeSet set = new TreeSet(); for (Object o : c) set.add(o.toString()); //System.out.println("set:"+set); return set; } }