Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.apache.lens.cube.parse; import static org.apache.lens.cube.metadata.DateFactory.NOW; import static org.apache.lens.cube.metadata.DateFactory.TWO_DAYS_RANGE; import static org.apache.lens.cube.metadata.DateFactory.TWO_MONTHS_RANGE_UPTO_DAYS; import static org.apache.lens.cube.metadata.DateFactory.getDateWithOffset; import static org.apache.lens.cube.parse.CandidateTablePruneCause.CandidateTablePruneCode.SEGMENTATION_PRUNED; import static org.apache.lens.cube.parse.CubeQueryConfUtil.DISABLE_AGGREGATE_RESOLVER; import static org.apache.lens.cube.parse.CubeQueryConfUtil.DISABLE_AUTO_JOINS; import static org.apache.lens.cube.parse.CubeQueryConfUtil.DRIVER_SUPPORTED_STORAGES; import static org.apache.lens.cube.parse.CubeQueryConfUtil.ENABLE_FLATTENING_FOR_BRIDGETABLES; import static org.apache.lens.cube.parse.CubeQueryConfUtil.ENABLE_GROUP_BY_TO_SELECT; import static org.apache.lens.cube.parse.CubeQueryConfUtil.ENABLE_SELECT_TO_GROUPBY; import static org.apache.lens.cube.parse.CubeQueryConfUtil.RESOLVE_SEGMENTATIONS; import static org.apache.lens.cube.parse.CubeTestSetup.getDbName; import static org.apache.lens.cube.parse.CubeTestSetup.getExpectedQuery; import static org.apache.lens.cube.parse.CubeTestSetup.getWhereForDailyAndHourly2days; import static org.apache.lens.cube.parse.CubeTestSetup.getWhereForHourly2days; import static org.apache.lens.cube.parse.CubeTestSetup.getWhereForUpdatePeriods; import static org.apache.lens.cube.parse.TestCubeRewriter.compareQueries; import static org.apache.commons.lang3.time.DateUtils.addDays; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertTrue; import static com.google.common.collect.Lists.newArrayList; import java.util.Comparator; import java.util.List; import java.util.Map; import java.util.stream.Collectors; import org.apache.lens.cube.error.NoCandidateFactAvailableException; import org.apache.lens.cube.metadata.UpdatePeriod; import org.apache.lens.server.api.LensServerAPITestUtil; import org.apache.lens.server.api.error.LensException; import org.apache.hadoop.conf.Configuration; import org.testng.annotations.BeforeTest; import org.testng.annotations.Test; import com.google.common.collect.Sets; import junit.framework.Assert; import lombok.extern.slf4j.Slf4j; @Slf4j public class TestCubeSegmentationRewriter extends TestQueryRewrite { private Configuration conf; @BeforeTest public void setupDriver() throws Exception { conf = LensServerAPITestUtil.getConfiguration(DRIVER_SUPPORTED_STORAGES, "C0,C1", DISABLE_AUTO_JOINS, false, ENABLE_SELECT_TO_GROUPBY, true, ENABLE_GROUP_BY_TO_SELECT, true, RESOLVE_SEGMENTATIONS, true, DISABLE_AGGREGATE_RESOLVER, false, ENABLE_FLATTENING_FOR_BRIDGETABLES, true); } @Override public Configuration getConf() { return new Configuration(conf); } private static String extractTableName(String query) { String l = query.toLowerCase(); int fromIndex = l.indexOf("from"); int toIndex = l.indexOf(" ", fromIndex + 5); return l.substring(fromIndex + 5, toIndex); } private static void compareUnionQuery(CubeQueryContext cubeql, String begin, String end, List<String> queries) throws LensException { final String actualLower = cubeql.toHQL().toLowerCase(); queries.sort(Comparator.comparing(s -> actualLower.indexOf(extractTableName(s)))); String expected = queries.stream().collect(Collectors.joining(" UNION ALL ", begin, end)); compareQueries(actualLower, expected); } @Test public void testSegmentRewrite() throws Exception { CubeQueryContext ctx = rewriteCtx("select cityid, segmsr1 from testcube where " + TWO_DAYS_RANGE, getConf()); String query1 = getExpectedQuery("testcube", "select testcube.cityid as alias0, sum(testcube.segmsr1) as alias1 FROM ", null, "group by testcube.cityid", getWhereForDailyAndHourly2days("testcube", "c1_b1fact1")); String query2 = getExpectedQuery("testcube", "select testcube.cityid as alias0, sum(testcube.segmsr1) as alias1 FROM ", null, "group by testcube.cityid", getWhereForDailyAndHourly2days("testcube", "c0_b2fact1")); compareUnionQuery(ctx, "SELECT (testcube.alias0) as `cityid`, sum((testcube.alias1)) as `segmsr1` FROM (", " ) as testcube GROUP BY (testcube.alias0)", newArrayList(query1, query2)); } /* Asking for segmsr1 from testcube. segmsr1 is available in b1b2fact and seg1 split over time. Inside seg1, Two segments are there: b1cube and b2cube. b1cube has one fact b1fact which is split over time across two storages: c1 and c2. b2cube has one fact which answers complete range given to it. So total 4 storage candidates should be there. */ @Test public void testFactUnionSegmentWithInnerUnion() throws Exception { CubeQueryContext ctx = rewriteCtx( "select cityid, segmsr1 from testcube where " + TWO_MONTHS_RANGE_UPTO_DAYS, getConf()); String query1, query2, query3, query4; query1 = getExpectedQuery("testcube", "select testcube.cityid as alias0, sum(testcube.segmsr1) as alias1 from ", null, "group by testcube.cityid", getWhereForUpdatePeriods("testcube", "c0_b1fact1", addDays(getDateWithOffset(UpdatePeriod.MONTHLY, -1), -1), getDateWithOffset(UpdatePeriod.DAILY, -10), Sets.newHashSet(UpdatePeriod.MONTHLY, UpdatePeriod.DAILY))); query2 = getExpectedQuery("testcube", "select testcube.cityid as alias0, sum(testcube.segmsr1) as alias1 from ", null, "group by testcube.cityid", getWhereForUpdatePeriods("testcube", "c1_b1fact1", getDateWithOffset(UpdatePeriod.DAILY, -11), NOW, Sets.newHashSet(UpdatePeriod.MONTHLY, UpdatePeriod.DAILY))); query3 = getExpectedQuery("testcube", "select testcube.cityid as alias0, sum(testcube.segmsr1) as alias1 from ", null, "group by testcube.cityid", getWhereForUpdatePeriods("testcube", "c0_b2fact1", addDays(getDateWithOffset(UpdatePeriod.MONTHLY, -1), -1), NOW, Sets.newHashSet(UpdatePeriod.MONTHLY, UpdatePeriod.DAILY))); query4 = getExpectedQuery("testcube", "select testcube.cityid as alias0, sum(testcube.segmsr1) as alias1 from ", null, "group by testcube.cityid", getWhereForUpdatePeriods("testcube", "c0_b1b2fact1", addDays(getDateWithOffset(UpdatePeriod.MONTHLY, -2), -1), addDays(getDateWithOffset(UpdatePeriod.MONTHLY, -1), 0), Sets.newHashSet(UpdatePeriod.MONTHLY, UpdatePeriod.DAILY))); compareUnionQuery(ctx, "select testcube.alias0 as cityid, sum(testcube.alias1) as segmsr1 from (", ") AS testcube GROUP BY (testcube.alias0)", newArrayList(query1, query2, query3, query4)); } @Test public void testFactJoinSegmentWithInnerUnion() throws Exception { CubeQueryContext ctx = rewriteCtx("select cityid, msr2, segmsr1 from testcube where " + TWO_DAYS_RANGE, getConf()); String query1, query2, query3; query1 = getExpectedQuery("testcube", "select testcube.cityid as alias0, 0.0 as alias1, sum(testcube.segmsr1) as alias2 FROM ", null, "group by testcube.cityid", getWhereForDailyAndHourly2days("testcube", "c1_b1fact1")); query2 = getExpectedQuery("testcube", "select testcube.cityid as alias0, 0.0 as alias1, sum(testcube.segmsr1) as alias2 FROM ", null, "group by testcube.cityid", getWhereForDailyAndHourly2days("testcube", "c0_b2fact1")); query3 = getExpectedQuery("testcube", "select testcube.cityid as alias0, sum(testcube.msr2) as alias1, 0.0 as alias2 FROM ", null, "group by testcube.cityid", getWhereForHourly2days("testcube", "c1_testfact2")); compareUnionQuery(ctx, "select testcube.alias0 as cityid, sum(testcube.alias1) as msr2, sum(testcube.alias2) as segmsr1 from ( ", ") as testcube group by testcube.alias0", newArrayList(query1, query2, query3)); } @Test public void testFieldWithDifferentDescriptions() throws LensException { NoCandidateFactAvailableException e = getLensExceptionInRewrite( "select invmsr1 from testcube where " + TWO_DAYS_RANGE, getConf()); assertEquals(e.getJsonMessage().getBrief(), "Columns [invmsr1] are not present in any table"); } @Test public void testExpressions() throws Exception { CubeQueryContext ctx = rewriteCtx( "select singlecolchainfield, segmsr1 from testcube where " + TWO_DAYS_RANGE, getConf()); String joinExpr = " JOIN " + getDbName() + "c1_citytable cubecity ON testcube.cityid = cubecity.id AND (cubecity.dt = 'latest')"; String query1, query2; query1 = getExpectedQuery("testcube", "SELECT (cubecity.name) AS `alias0`, sum((testcube.segmsr1)) AS `alias1` from", joinExpr, null, "group by cubecity.name", null, getWhereForDailyAndHourly2days("testcube", "c1_b1fact1")); query2 = getExpectedQuery("testcube", "SELECT (cubecity.name) AS `alias0`, sum((testcube.segmsr1)) AS `alias1` from", joinExpr, null, "group by cubecity.name", null, getWhereForDailyAndHourly2days("testcube", "c0_b2fact1")); compareUnionQuery(ctx, "SELECT (testcube.alias0) AS `singlecolchainfield`, sum((testcube.alias1)) AS `segmsr1` from (", "as testcube group by testcube.alias0", newArrayList(query1, query2)); } @Test public void testQueryWithWhereHavingGroupbyOrderby() throws Exception { String userQuery = "select cityid, msr2, segmsr1 from testcube where cityname='blah' and " + TWO_DAYS_RANGE + " group by cityid having segmsr1 > 1 and msr2 > 2"; CubeQueryContext ctx = rewriteCtx(userQuery, getConf()); String join1, join2, join3; String query1, query2, query3; join1 = "join " + getDbName() + "c1_citytable cubecity1 ON testcube.cityid1 = cubecity1.id AND (cubecity1.dt = 'latest')"; join2 = "join " + getDbName() + "c1_citytable cubecity2 ON testcube.cityid2 = cubecity2.id AND (cubecity2.dt = 'latest')"; join3 = "join " + getDbName() + "c1_citytable cubecity ON testcube.cityid = cubecity.id AND (cubecity.dt = 'latest')"; query1 = getExpectedQuery("testcube", "select testcube.cityid as alias0, 0.0 as alias1, sum(testcube.segmsr1) as alias2 FROM ", join1, "cubecity1.name='blah'", "group by testcube.cityid", null, getWhereForDailyAndHourly2days("testcube", "c1_b1fact1")); query2 = getExpectedQuery("testcube", "select testcube.cityid as alias0, 0.0 as alias1, sum(testcube.segmsr1) as alias2 FROM ", join2, "cubecity2.name='blah'", "group by testcube.cityid", null, getWhereForDailyAndHourly2days("testcube", "c0_b2fact1")); query3 = getExpectedQuery("testcube", "select testcube.cityid as alias0, sum(testcube.msr2) as alias1, 0.0 as alias2 FROM ", join3, "cubecity.name='blah'", "group by testcube.cityid", null, getWhereForHourly2days("testcube", "c1_testfact2")); compareUnionQuery(ctx, "select testcube.alias0 as cityid, sum(testcube.alias1) as msr2, sum(testcube.alias2) as segmsr1 from ( ", ") as testcube group by testcube.alias0 having ((sum((testcube.alias2)) > 1) and (sum((testcube.alias1)) > 2)", newArrayList(query1, query2, query3)); // Expression in having userQuery = "select cityid, segmsr1 from testcube where cityname='blah' and " + TWO_DAYS_RANGE + " having citysegmsr1 > 20"; String rewrittenQuery = rewrite(userQuery, getConf()); assertTrue(rewrittenQuery.toLowerCase() .endsWith("sum(case when ((cubecity.name) = 'foo') " + "then (testcube.segmsr1) end) > 20)")); // Order by on alias userQuery = "select cityid as `city_id_alias`, segmsr1 from testcube where cityname='blah' and " + TWO_DAYS_RANGE + " order by city_id_alias"; rewrittenQuery = rewrite(userQuery, getConf()); assertTrue(rewrittenQuery.toLowerCase().endsWith("order by city_id_alias asc")); // Order by on column but the final query rewritten with alias userQuery = "select cityid as `city_id_alias`, segmsr1 from testcube where cityname='blah' and " + TWO_DAYS_RANGE + " order by cityid"; rewrittenQuery = rewrite(userQuery, getConf()); assertTrue(rewrittenQuery.toLowerCase().endsWith("order by city_id_alias asc")); } @Test public void testQueryWithManyToMany() throws LensException { String userQuery = "select usersports.name, xusersports.name, yusersports.name, segmsr1, msr2 from testcube where " + TWO_DAYS_RANGE; CubeQueryContext ctx = rewriteCtx(userQuery, getConf()); String query1, query2, query3; String joinExpr = " join " + getDbName() + "c1_usertable userdim_1 on testcube.userid = userdim_1.id " + " join (select user_interests_1.user_id as user_id, collect_set(usersports.name) as balias0 from " + getDbName() + "c1_user_interests_tbl user_interests_1 join " + getDbName() + "c1_sports_tbl usersports on " + "user_interests_1.sport_id = usersports.id group by user_interests_1.user_id) " + "usersports on userdim_1.id = usersports.user_id" + " join " + getDbName() + "c1_usertable userdim_0 on testcube.yuserid = userdim_0.id " + " join (select user_interests_0.user_id as user_id,collect_set(yusersports.name) as balias0 from " + getDbName() + "c1_user_interests_tbl user_interests_0 join " + getDbName() + "c1_sports_tbl yusersports on " + " user_interests_0.sport_id = yusersports.id group by user_interests_0.user_id) yusersports on userdim_0.id =" + " yusersports.user_id join " + getDbName() + "c1_usertable userdim on testcube.xuserid = userdim.id" + " join (select user_interests.user_id as user_id,collect_set(xusersports.name) as balias0 from " + getDbName() + "c1_user_interests_tbl user_interests join " + getDbName() + "c1_sports_tbl xusersports" + " on user_interests.sport_id = xusersports.id group by user_interests.user_id) xusersports on userdim.id = " + " xusersports.user_id"; query1 = getExpectedQuery("testcube", "select (usersports.balias0) AS `alias0`, (xusersports.balias0) AS `alias1`, (yusersports.balias0) AS `alias2`, " + "sum((testcube.segmsr1)) AS `alias3`, 0.0 AS `alias4` FROM ", joinExpr, null, "group by (usersports.balias0), (xusersports.balias0), (yusersports.balias0), ", null, getWhereForDailyAndHourly2days("testcube", "c1_b1fact1")); query2 = getExpectedQuery("testcube", "select (usersports.balias0) AS `alias0`, (xusersports.balias0) AS `alias1`, (yusersports.balias0) AS `alias2`, " + "sum((testcube.segmsr1)) AS `alias3`, 0.0 AS `alias4` FROM ", joinExpr, null, "group by (usersports.balias0), (xusersports.balias0), (yusersports.balias0)", null, getWhereForDailyAndHourly2days("testcube", "c0_b2fact1")); query3 = getExpectedQuery("testcube", "select (usersports.balias0) AS `alias0`, (xusersports.balias0) AS `alias1`, (yusersports.balias0) AS `alias2`, " + "0.0 AS `alias3`, sum(testcube.msr2) AS `alias4` FROM ", joinExpr, null, "group by (usersports.balias0), (xusersports.balias0), (yusersports.balias0)", null, getWhereForHourly2days("testcube", "c1_testfact2")); compareUnionQuery(ctx, "select testcube.alias0 AS `name`,testcube.alias1 AS `name`, testcube.alias2 AS `name`, " + "sum((testcube.alias3)) AS `segmsr1`, sum((testcube.alias4)) AS `msr2` from ( ", ") as testcube group by testcube.alias0, testcube.alias1, testcube.alias2", newArrayList(query1, query2, query3)); } @Test public void testQueryWithHavingOnInnerMeasure() throws LensException { String userQuery = "select cityid from testcube where " + TWO_DAYS_RANGE + " having msr2 > 2 and segmsr1 > 1"; CubeQueryContext ctx = rewriteCtx(userQuery, getConf()); String query1, query2, query3; query1 = getExpectedQuery("testcube", "select testcube.cityid as alias0, 0.0 as alias1, sum(testcube.segmsr1) as alias2 FROM ", null, "group by testcube.cityid", getWhereForDailyAndHourly2days("testcube", "c1_b1fact1")); query2 = getExpectedQuery("testcube", "select testcube.cityid as alias0, 0.0 as alias1, sum(testcube.segmsr1) as alias2 FROM ", null, "group by testcube.cityid", getWhereForDailyAndHourly2days("testcube", "c0_b2fact1")); query3 = getExpectedQuery("testcube", "select testcube.cityid as alias0, sum(testcube.msr2) as alias1, 0.0 as alias2 FROM ", null, "group by testcube.cityid", getWhereForHourly2days("testcube", "c1_testfact2")); compareUnionQuery(ctx, "select testcube.alias0 as cityid from ( ", ") as testcube group by testcube.alias0 having sum(testcube.alias1) > 2 and sum(testcube.alias2) > 1", newArrayList(query1, query2, query3)); } @Test public void testSegmentationWithSingleSegment() throws LensException { String userQuery = "select segmsr1 from basecube where " + TWO_DAYS_RANGE; String actual = rewrite(userQuery, getConf()); String expected = getExpectedQuery("basecube", "select sum(basecube.segmsr1) FROM ", null, null, getWhereForDailyAndHourly2days("basecube", "c1_b1fact1")); compareQueries(actual, expected); } @Test public void testSegmentationPruningWithPruneCause() throws LensException { String userQuery = "select segsegmsr1 from testcube where " + TWO_DAYS_RANGE; PruneCauses<Candidate> pruneCauses = getBriefAndDetailedError(userQuery, getConf()); Assert.assertEquals(pruneCauses.getMaxCause(), SEGMENTATION_PRUNED); Map<String, String> innerCauses = pruneCauses.getCompact().get("SEG[b1cube; b2cube]").iterator().next() .getInnerCauses(); Assert.assertEquals(innerCauses.size(), 2); Assert.assertTrue(innerCauses.get("b1cube").equals("Columns [segsegmsr1] are not present in any table")); Assert.assertTrue(innerCauses.get("b2cube").equals("Columns [segsegmsr1] are not present in any table")); } }