Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.apache.lens.cube.parse; import static java.util.stream.Collectors.joining; import static java.util.stream.Collectors.toMap; import static org.apache.lens.cube.metadata.DateUtil.formatAbsDate; import static org.apache.lens.cube.metadata.MetastoreUtil.getStringLiteralAST; import static org.apache.hadoop.hive.ql.parse.HiveParser.Identifier; import static org.apache.hadoop.hive.ql.parse.HiveParser.TOK_FROM; import static org.apache.hadoop.hive.ql.parse.HiveParser.TOK_HAVING; import static org.apache.hadoop.hive.ql.parse.HiveParser.TOK_INSERT; import static org.apache.hadoop.hive.ql.parse.HiveParser.TOK_ORDERBY; import static org.apache.hadoop.hive.ql.parse.HiveParser.TOK_SELEXPR; import java.util.Collection; import java.util.Comparator; import java.util.Date; import java.util.Map; import java.util.Objects; import java.util.Optional; import java.util.OptionalDouble; import java.util.Set; import java.util.function.Predicate; import java.util.stream.Collector; import java.util.stream.Collectors; import java.util.stream.Stream; import org.apache.lens.cube.metadata.Cube; import org.apache.lens.cube.metadata.CubeColumn; import org.apache.lens.cube.metadata.CubeInterface; import org.apache.lens.cube.metadata.FactPartition; import org.apache.lens.cube.metadata.MetastoreUtil; import org.apache.lens.cube.metadata.Segment; import org.apache.lens.cube.metadata.Segmentation; import org.apache.lens.cube.metadata.TimeRange; import org.apache.lens.server.api.error.LensException; import org.apache.commons.lang3.tuple.Pair; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.parse.ASTNode; import org.antlr.runtime.CommonToken; import com.google.common.collect.Maps; import com.google.common.collect.Sets; import lombok.Getter; /** * Created on 09/03/17. */ public class SegmentationCandidate implements Candidate { Collection<String> columns; @Getter private final CubeQueryContext cubeQueryContext; private Segmentation segmentation; private Map<String, Cube> cubesOfSegmentation; Map<String, CubeQueryContext> cubeQueryContextMap; @Getter private final Set<Integer> answerableMeasurePhraseIndices = Sets.newHashSet(); private Map<TimeRange, TimeRange> queriedRangeToMyRange = Maps.newHashMap(); SegmentationCandidate(CubeQueryContext cubeQueryContext, Segmentation segmentation) throws LensException { this.cubeQueryContext = cubeQueryContext; this.segmentation = segmentation; cubesOfSegmentation = Maps.newHashMap(); cubeQueryContextMap = Maps.newHashMap(); for (Segment segment : segmentation.getSegments()) { // assuming only base cubes in segmentation cubesOfSegmentation.put(segment.getName(), (Cube) getCubeMetastoreClient().getCube(segment.getName())); } } public SegmentationCandidate explode() throws LensException { return this; } private static <T> Predicate<T> not(Predicate<T> predicate) { return predicate.negate(); } boolean rewriteInternal(Configuration conf, HiveConf hconf) throws LensException { CubeInterface cube = getCube(); if (cube == null) { return false; } for (Segment segment : segmentation.getSegments()) { // assuming only base cubes in segmentation Cube innerCube = (Cube) getCubeMetastoreClient().getCube(segment.getName()); cubesOfSegmentation.put(segment.getName(), innerCube); Set<QueriedPhraseContext> notAnswerable = cubeQueryContext.getQueriedPhrases().stream() .filter(not(this::isPhraseAnswerable)).collect(Collectors.toSet()); // create ast ASTNode ast = MetastoreUtil.copyAST(cubeQueryContext.getAst(), astNode -> { // replace time range for (Map.Entry<TimeRange, TimeRange> timeRangeTimeRangeEntry : queriedRangeToMyRange.entrySet()) { TimeRange queriedTimeRange = timeRangeTimeRangeEntry.getKey(); TimeRange timeRange = timeRangeTimeRangeEntry.getValue(); if (astNode.getParent() == queriedTimeRange.getAstNode()) { if (astNode.getChildIndex() == 2) { return Pair.of(getStringLiteralAST(formatAbsDate(timeRange.getFromDate())), false); } else if (astNode.getChildIndex() == 3) { return Pair.of(getStringLiteralAST(formatAbsDate(timeRange.getToDate())), false); } break; } } // else, replace unanswerable measures for (QueriedPhraseContext phraseContext : notAnswerable) { if ((astNode.getType() != TOK_SELEXPR && astNode == phraseContext.getExprAST()) || astNode.getParent() == phraseContext.getExprAST()) { return Pair.of(MetastoreUtil.copyAST(UnionQueryWriter.DEFAULT_MEASURE_AST), false); } } // else, copy token replacing cube name and ask for recursion on child nodes // this is hard copy. Default is soft copy, which is new ASTNode(astNode) // Soft copy retains the token object inside it, hard copy copies token object return Pair.of(new ASTNode(new CommonToken(astNode.getToken())), true); }); addCubeNameAndAlias(ast, innerCube); trimHavingAndOrderby(ast, innerCube); CubeQueryRewriter rewriter = new CubeQueryRewriter(conf, hconf); CubeQueryContext ctx = rewriter.rewrite(ast); cubeQueryContextMap.put(segment.getName(), ctx); if (!ctx.getCandidates().isEmpty()) { ctx.pickCandidateToQuery(); for (StorageCandidate storageCandidate : CandidateUtil .getStorageCandidates(ctx.getPickedCandidate())) { for (Map.Entry<TimeRange, TimeRange> timeRangeTimeRangeEntry : queriedRangeToMyRange .entrySet()) { TimeRange timeRange = timeRangeTimeRangeEntry.getKey(); TimeRange queriedTimeRange = timeRangeTimeRangeEntry.getValue(); Set<FactPartition> rangeToPartition = storageCandidate.getRangeToPartitions() .get(timeRange); if (rangeToPartition != null) { storageCandidate.getRangeToPartitions().put(queriedTimeRange, rangeToPartition); } String extraWhere = storageCandidate.getRangeToExtraWhereFallBack().get(timeRange); if (extraWhere != null) { storageCandidate.getRangeToExtraWhereFallBack().put(queriedTimeRange, extraWhere); } } } } } return areCandidatesPicked(); } private void addCubeNameAndAlias(ASTNode ast, Cube innerCube) { ASTNode cubeNameNode = findCubeNameNode(HQLParser.findNodeByPath(ast, TOK_FROM)); assert cubeNameNode != null; ASTNode tabrefNode = (ASTNode) cubeNameNode.getParent().getParent(); cubeNameNode.getToken().setText(innerCube.getName()); ASTNode aliasNode = new ASTNode( new CommonToken(Identifier, getCubeQueryContext().getAliasForTableName(getCube().getName()))); if (tabrefNode.getChildCount() > 1) { tabrefNode.setChild(1, aliasNode); } else { tabrefNode.addChild(aliasNode); } } private ASTNode findCubeNameNode(ASTNode node) { if (node.getType() == Identifier) { if (node.getText().equalsIgnoreCase(getCubeQueryContext().getCube().getName())) { return node; } else { return null; // should never come here. } } return node.getChildren().stream().map(ASTNode.class::cast).map(this::findCubeNameNode) .filter(Objects::nonNull).findFirst().orElse(null); } private void trimHavingAndOrderby(ASTNode ast, Cube innerCube) { ASTNode havingAst = HQLParser.findNodeByPath(ast, TOK_INSERT, TOK_HAVING); if (havingAst != null) { ASTNode newHavingAst = HQLParser.trimHavingAst(havingAst, innerCube.getAllFieldNames()); if (newHavingAst != null) { havingAst.getParent().setChild(havingAst.getChildIndex(), newHavingAst); } else { havingAst.getParent().deleteChild(havingAst.getChildIndex()); } } ASTNode orderByAst = HQLParser.findNodeByPath(ast, TOK_INSERT, TOK_ORDERBY); if (orderByAst != null) { ASTNode newOrderByAst = HQLParser.trimOrderByAst(orderByAst, innerCube.getAllFieldNames()); if (newOrderByAst != null) { orderByAst.getParent().setChild(orderByAst.getChildIndex(), newOrderByAst); } else { orderByAst.getParent().deleteChild(orderByAst.getChildIndex()); } } } public SegmentationCandidate(SegmentationCandidate segmentationCandidate) throws LensException { this(segmentationCandidate.cubeQueryContext, segmentationCandidate.segmentation); } @Override public Collection<String> getColumns() { if (columns == null) { columns = cubeStream().map(Cube::getAllFieldNames).reduce(Sets::intersection) .orElseGet(Sets::newHashSet).stream().collect(Collectors.toSet()); } return columns; } @Override public Date getStartTime() { return segmentation.getStartTime(); } @Override public Date getEndTime() { return segmentation.getEndTime(); } @Override public OptionalDouble getCost() { if (areCandidatesPicked()) { double cost = 0.0; for (Candidate candidate : getChildren()) { if (candidate.getCost().isPresent()) { cost += candidate.getCost().getAsDouble(); } else { return OptionalDouble.empty(); } } return OptionalDouble.of(cost); } else { return OptionalDouble.empty(); } } @Override public boolean contains(Candidate candidate) { return areCandidatesPicked() && getChildren().contains(candidate); } @Override public Collection<Candidate> getChildren() { return candidateStream().collect(Collectors.toSet()); } @Override public boolean isTimeRangeCoverable(TimeRange timeRange) throws LensException { return true; } @Override public boolean evaluateCompleteness(TimeRange timeRange, TimeRange queriedTimeRange, boolean failOnPartialData) throws LensException { queriedRangeToMyRange.put(queriedTimeRange, timeRange); return true; } @Override public Set<FactPartition> getParticipatingPartitions() { Set<FactPartition> partitionSet = Sets.newHashSet(); for (CubeQueryContext cubeQueryContext : cubeQueryContextMap.values()) { if (cubeQueryContext.getPickedCandidate() != null) { partitionSet.addAll(cubeQueryContext.getPickedCandidate().getParticipatingPartitions()); } } return partitionSet; } @Override public boolean isExpressionEvaluable(ExpressionResolver.ExpressionContext expr) { // expression context is specific to cubequerycontext. So for segmentation candidate, // I can't ask my children to check this context for evaluability. return cubeStream().map(cube -> cube.getExpressionByName(expr.getExprCol().getName())) .allMatch(Objects::nonNull); } private boolean areCandidatesPicked() { return candidateStream().count() == cubesOfSegmentation.size(); } private Stream<Candidate> candidateStream() { return contextStream().map(CubeQueryContext::getPickedCandidate).filter(Objects::nonNull); } private Stream<CubeQueryContext> contextStream() { return cubeQueryContextMap.values().stream(); } private Stream<Cube> cubeStream() { return cubesOfSegmentation.values().stream(); } @Override public boolean isExpressionEvaluable(String expr) { return candidateStream().allMatch(cand -> cand.isExpressionEvaluable(expr)); } @Override public boolean isDimAttributeEvaluable(String dim) throws LensException { if (areCandidatesPicked()) { for (Candidate childCandidate : (Iterable<Candidate>) candidateStream()::iterator) { if (!childCandidate.isDimAttributeEvaluable(dim)) { return false; } } return true; } return hasColumn(dim); } @Override public Candidate copy() throws LensException { return new SegmentationCandidate(this); } @Override public boolean isPhraseAnswerable(QueriedPhraseContext phrase) { // TODO consider measure start time etc return getColumns().containsAll(phrase.getColumns()); } @Override public Optional<Date> getColumnStartTime(String column) { if (areCandidatesPicked()) { return candidateStream().map(c -> c.getColumnStartTime(column)).filter(Optional::isPresent) .map(Optional::get).min(Comparator.naturalOrder()); } else { return cubeStream().map(cube -> cube.getColumnByName(column)).map(CubeColumn::getStartTime) .filter(Objects::nonNull).min(Comparator.naturalOrder()); } } @Override public Optional<Date> getColumnEndTime(String column) { if (areCandidatesPicked()) { return candidateStream().map(c -> c.getColumnEndTime(column)).filter(Optional::isPresent) // use flatmap(Optional::stream) after migration to java9 .map(Optional::get) // https://bugs.openjdk.java.net/browse/JDK-8050820 .max(Comparator.naturalOrder()); } else { return cubeStream().map(cube -> cube.getColumnByName(column)).map(CubeColumn::getEndTime) .filter(Objects::nonNull).max(Comparator.naturalOrder()); } } public void addAnswerableMeasurePhraseIndices(int index) { answerableMeasurePhraseIndices.add(index); } public String toString() { Collector<CharSequence, ?, String> collector = joining("; ", "SEG[", "]"); if (areCandidatesPicked()) { return candidateStream().map(Candidate::toString).collect(collector); } else { return cubeStream().map(Cube::getName).collect(collector); } } Map<String, PruneCauses<Candidate>> getPruneCausesOfFailedContexts() { return cubeQueryContextMap.entrySet().stream() .filter(entry -> entry.getValue().getPickedCandidate() == null) .collect(toMap(Map.Entry::getKey, entry -> entry.getValue().getStoragePruningMsgs())); } }