Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.kylin.cube.inmemcubing; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.Random; import java.util.Set; import java.util.TreeSet; import java.util.concurrent.ArrayBlockingQueue; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; import org.apache.commons.io.FileUtils; import org.apache.kylin.common.KylinConfig; import org.apache.kylin.common.util.Dictionary; import org.apache.kylin.common.util.LocalFileMetadataTestCase; import org.apache.kylin.cube.CubeInstance; import org.apache.kylin.cube.CubeManager; import org.apache.kylin.cube.cuboid.Cuboid; import org.apache.kylin.cube.model.CubeDesc; import org.apache.kylin.cube.model.CubeJoinedFlatTableEnrich; import org.apache.kylin.dict.DictionaryGenerator; import org.apache.kylin.dict.IterableDictionaryValueEnumerator; import org.apache.kylin.engine.EngineFactory; import org.apache.kylin.gridtable.GTRecord; import org.apache.kylin.metadata.model.FunctionDesc; import org.apache.kylin.metadata.model.IJoinedFlatTableDesc; import org.apache.kylin.metadata.model.MeasureDesc; import org.apache.kylin.metadata.model.TblColRef; import org.junit.After; import org.junit.Before; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.google.common.collect.Lists; import com.google.common.collect.Maps; /** */ public class ITInMemCubeBuilderTest extends LocalFileMetadataTestCase { private static final Logger logger = LoggerFactory.getLogger(ITInMemCubeBuilderTest.class); private CubeInstance cube; private String flatTable; private Map<TblColRef, Dictionary<String>> dictionaryMap; private int nInpRows; private int nThreads; @Before public void before() throws IOException { createTestMetadata(); } @After public void after() throws Exception { cleanupTestMetadata(); } @Test public void testSSBCubeMore() throws Exception { testBuild("ssb", // LOCALMETA_TEST_DATA + "/data/kylin_intermediate_ssb_19920101000000_19920201000000.csv", 7000, 4); } @Test public void testSSBCube() throws Exception { testBuild("ssb", // LOCALMETA_TEST_DATA + "/data/kylin_intermediate_ssb_19920101000000_19920201000000.csv", 1000, 1); } public void testBuild(String cubeName, String flatTable, int nInpRows, int nThreads) throws Exception { KylinConfig kylinConfig = KylinConfig.getInstanceFromEnv(); CubeManager cubeManager = CubeManager.getInstance(kylinConfig); this.nInpRows = nInpRows; this.nThreads = nThreads; this.cube = cubeManager.getCube(cubeName); this.flatTable = flatTable; this.dictionaryMap = getDictionaryMap(cube, flatTable); testBuildInner(); } private void testBuildInner() throws Exception { IJoinedFlatTableDesc flatDesc = EngineFactory.getJoinedFlatTableDesc(cube.getDescriptor()); InMemCubeBuilder cubeBuilder = new InMemCubeBuilder(cube.getDescriptor(), flatDesc, dictionaryMap); //DoggedCubeBuilder cubeBuilder = new DoggedCubeBuilder(cube.getDescriptor(), dictionaryMap); cubeBuilder.setConcurrentThreads(nThreads); ArrayBlockingQueue<List<String>> queue = new ArrayBlockingQueue<List<String>>(1000); ExecutorService executorService = Executors.newSingleThreadExecutor(); try { // round 1 { Future<?> future = executorService .submit(cubeBuilder.buildAsRunnable(queue, new ConsoleGTRecordWriter())); feedData(cube, flatTable, queue, nInpRows); future.get(); } // round 2, zero input { Future<?> future = executorService .submit(cubeBuilder.buildAsRunnable(queue, new ConsoleGTRecordWriter())); feedData(cube, flatTable, queue, 0); future.get(); } // round 3 { Future<?> future = executorService .submit(cubeBuilder.buildAsRunnable(queue, new ConsoleGTRecordWriter())); feedData(cube, flatTable, queue, nInpRows); future.get(); } } catch (Exception e) { logger.error("stream build failed", e); throw new IOException("Failed to build cube ", e); } } static void feedData(final CubeInstance cube, final String flatTable, ArrayBlockingQueue<List<String>> queue, int count) throws IOException, InterruptedException { feedData(cube, flatTable, queue, count, 0); } static void feedData(final CubeInstance cube, final String flatTable, ArrayBlockingQueue<List<String>> queue, int count, long randSeed) throws IOException, InterruptedException { IJoinedFlatTableDesc flatDesc = EngineFactory.getJoinedFlatTableDesc(cube.getDescriptor()); int nColumns = flatDesc.getAllColumns().size(); @SuppressWarnings("unchecked") Set<String>[] distinctSets = new Set[nColumns]; for (int i = 0; i < nColumns; i++) distinctSets[i] = new TreeSet<String>(); // get distinct values on each column List<String> lines = FileUtils.readLines(new File(flatTable), "UTF-8"); for (String line : lines) { String[] row = line.trim().split(","); assert row.length == nColumns; for (int i = 0; i < nColumns; i++) distinctSets[i].add(row[i]); } List<String[]> distincts = new ArrayList<String[]>(); for (int i = 0; i < nColumns; i++) { distincts.add((String[]) distinctSets[i].toArray(new String[distinctSets[i].size()])); } Random rand = new Random(); if (randSeed != 0) rand.setSeed(randSeed); // output with random data for (; count > 0; count--) { ArrayList<String> row = new ArrayList<String>(nColumns); for (int i = 0; i < nColumns; i++) { String[] candidates = distincts.get(i); row.add(candidates[rand.nextInt(candidates.length)]); } queue.put(row); } queue.put(new ArrayList<String>(0)); } static Map<TblColRef, Dictionary<String>> getDictionaryMap(CubeInstance cube, String flatTable) throws IOException { Map<TblColRef, Dictionary<String>> result = Maps.newHashMap(); CubeDesc desc = cube.getDescriptor(); CubeJoinedFlatTableEnrich flatDesc = new CubeJoinedFlatTableEnrich( EngineFactory.getJoinedFlatTableDesc(desc), desc); int nColumns = flatDesc.getAllColumns().size(); List<TblColRef> columns = Cuboid.getBaseCuboid(desc).getColumns(); for (int c = 0; c < columns.size(); c++) { TblColRef col = columns.get(c); if (desc.getRowkey().isUseDictionary(col)) { logger.info("Building dictionary for " + col); List<String> valueList = readValueList(flatTable, nColumns, flatDesc.getRowKeyColumnIndexes()[c]); Dictionary<String> dict = DictionaryGenerator.buildDictionary(col.getType(), new IterableDictionaryValueEnumerator(valueList)); result.put(col, dict); } } for (int measureIdx = 0; measureIdx < cube.getDescriptor().getMeasures().size(); measureIdx++) { MeasureDesc measureDesc = cube.getDescriptor().getMeasures().get(measureIdx); FunctionDesc func = measureDesc.getFunction(); List<TblColRef> dictCols = func.getMeasureType().getColumnsNeedDictionary(func); if (dictCols.isEmpty()) continue; int[] flatTableIdx = flatDesc.getMeasureColumnIndexes()[measureIdx]; List<TblColRef> paramCols = func.getParameter().getColRefs(); for (int i = 0; i < paramCols.size(); i++) { TblColRef col = paramCols.get(i); if (dictCols.contains(col)) { int colIdxOnFlat = flatTableIdx[i]; logger.info("Building dictionary for " + col); List<String> valueList = readValueList(flatTable, nColumns, colIdxOnFlat); Dictionary<String> dict = DictionaryGenerator.buildDictionary(col.getType(), new IterableDictionaryValueEnumerator(valueList)); result.put(col, dict); } } } return result; } private static List<String> readValueList(String flatTable, int nColumns, int c) throws IOException { List<String> result = Lists.newArrayList(); List<String> lines = FileUtils.readLines(new File(flatTable), "UTF-8"); for (String line : lines) { String[] row = line.trim().split(","); if (row.length != nColumns) { throw new IllegalStateException(); } if (row[c] != null) { result.add(row[c]); } } return result; } class ConsoleGTRecordWriter implements ICuboidWriter { boolean verbose = false; @Override public void write(long cuboidId, GTRecord record) throws IOException { if (verbose) System.out.println(record.toString()); } @Override public void flush() { if (verbose) { System.out.println("flush"); } } @Override public void close() { if (verbose) { System.out.println("close"); } } } }