org.apache.kylin.cube.inmemcubing.InMemCubeBuilderTest.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.kylin.cube.inmemcubing.InMemCubeBuilderTest.java

Source

/*
 *  Licensed to the Apache Software Foundation (ASF) under one or more
 *  contributor license agreements. See the NOTICE file distributed with
 *  this work for additional information regarding copyright ownership.
 *  The ASF licenses this file to You under the Apache License, Version 2.0
 *  (the "License"); you may not use this file except in compliance with
 *  the License. You may obtain a copy of the License at
 *
 *  http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 */

package org.apache.kylin.cube.inmemcubing;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.Set;
import java.util.TreeSet;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;

import org.apache.commons.io.FileUtils;
import org.apache.kylin.common.KylinConfig;
import org.apache.kylin.common.util.Bytes;
import org.apache.kylin.common.util.LocalFileMetadataTestCase;
import org.apache.kylin.cube.CubeInstance;
import org.apache.kylin.cube.CubeManager;
import org.apache.kylin.cube.cuboid.Cuboid;
import org.apache.kylin.cube.model.CubeDesc;
import org.apache.kylin.cube.model.CubeJoinedFlatTableDesc;
import org.apache.kylin.dict.DictionaryGenerator;
import org.apache.kylin.dict.IterableDictionaryValueEnumerator;
import org.apache.kylin.dimension.Dictionary;
import org.apache.kylin.gridtable.GTRecord;
import org.apache.kylin.metadata.model.FunctionDesc;
import org.apache.kylin.metadata.model.MeasureDesc;
import org.apache.kylin.metadata.model.TblColRef;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.google.common.collect.Lists;
import com.google.common.collect.Maps;

/**
 */
public class InMemCubeBuilderTest extends LocalFileMetadataTestCase {

    private static final Logger logger = LoggerFactory.getLogger(InMemCubeBuilderTest.class);

    private CubeInstance cube;
    private String flatTable;
    private Map<TblColRef, Dictionary<String>> dictionaryMap;

    private int nInpRows;
    private int nThreads;

    @Before
    public void before() throws IOException {
        createTestMetadata();
    }

    @After
    public void after() throws Exception {
        cleanupTestMetadata();
    }

    @Test
    public void testKylinCube() throws Exception {
        testBuild("test_kylin_cube_without_slr_left_join_empty", //
                "../examples/test_case_data/localmeta/data/flatten_data_for_without_slr_left_join.csv", 70000, 4);
    }

    @Test
    public void testSSBCube() throws Exception {
        testBuild("ssb", //
                "../examples/test_case_data/localmeta/data/kylin_intermediate_ssb_19920101000000_19920201000000.csv",
                1000, 1);
    }

    public void testBuild(String cubeName, String flatTable, int nInpRows, int nThreads) throws Exception {
        KylinConfig kylinConfig = KylinConfig.getInstanceFromEnv();
        CubeManager cubeManager = CubeManager.getInstance(kylinConfig);

        this.nInpRows = nInpRows;
        this.nThreads = nThreads;

        this.cube = cubeManager.getCube(cubeName);
        this.flatTable = flatTable;
        this.dictionaryMap = getDictionaryMap(cube, flatTable);

        testBuildInner();
    }

    private void testBuildInner() throws Exception {

        InMemCubeBuilder cubeBuilder = new InMemCubeBuilder(cube.getDescriptor(), dictionaryMap);
        //DoggedCubeBuilder cubeBuilder = new DoggedCubeBuilder(cube.getDescriptor(), dictionaryMap);
        cubeBuilder.setConcurrentThreads(nThreads);

        ArrayBlockingQueue<List<String>> queue = new ArrayBlockingQueue<List<String>>(1000);
        ExecutorService executorService = Executors.newSingleThreadExecutor();

        try {
            // round 1
            {
                Future<?> future = executorService
                        .submit(cubeBuilder.buildAsRunnable(queue, new ConsoleGTRecordWriter()));
                feedData(cube, flatTable, queue, nInpRows);
                future.get();
            }

            // round 2, zero input
            {
                Future<?> future = executorService
                        .submit(cubeBuilder.buildAsRunnable(queue, new ConsoleGTRecordWriter()));
                feedData(cube, flatTable, queue, 0);
                future.get();
            }

            // round 3
            {
                Future<?> future = executorService
                        .submit(cubeBuilder.buildAsRunnable(queue, new ConsoleGTRecordWriter()));
                feedData(cube, flatTable, queue, nInpRows);
                future.get();
            }

        } catch (Exception e) {
            logger.error("stream build failed", e);
            throw new IOException("Failed to build cube ", e);
        }
    }

    static void feedData(final CubeInstance cube, final String flatTable, ArrayBlockingQueue<List<String>> queue,
            int count) throws IOException, InterruptedException {
        feedData(cube, flatTable, queue, count, 0);
    }

    static void feedData(final CubeInstance cube, final String flatTable, ArrayBlockingQueue<List<String>> queue,
            int count, long randSeed) throws IOException, InterruptedException {
        CubeJoinedFlatTableDesc flatTableDesc = new CubeJoinedFlatTableDesc(cube.getDescriptor(), null);
        int nColumns = flatTableDesc.getColumnList().size();

        @SuppressWarnings("unchecked")
        Set<String>[] distinctSets = new Set[nColumns];
        for (int i = 0; i < nColumns; i++)
            distinctSets[i] = new TreeSet<String>();

        // get distinct values on each column
        List<String> lines = FileUtils.readLines(new File(flatTable), "UTF-8");
        for (String line : lines) {
            String[] row = line.trim().split(",");
            assert row.length == nColumns;
            for (int i = 0; i < nColumns; i++)
                distinctSets[i].add(row[i]);
        }

        List<String[]> distincts = new ArrayList<String[]>();
        for (int i = 0; i < nColumns; i++) {
            distincts.add((String[]) distinctSets[i].toArray(new String[distinctSets[i].size()]));
        }

        Random rand = new Random();
        if (randSeed != 0)
            rand.setSeed(randSeed);

        // output with random data
        for (; count > 0; count--) {
            ArrayList<String> row = new ArrayList<String>(nColumns);
            for (int i = 0; i < nColumns; i++) {
                String[] candidates = distincts.get(i);
                row.add(candidates[rand.nextInt(candidates.length)]);
            }
            queue.put(row);
        }
        queue.put(new ArrayList<String>(0));
    }

    static Map<TblColRef, Dictionary<String>> getDictionaryMap(CubeInstance cube, String flatTable)
            throws IOException {
        Map<TblColRef, Dictionary<String>> result = Maps.newHashMap();
        CubeDesc desc = cube.getDescriptor();
        CubeJoinedFlatTableDesc flatTableDesc = new CubeJoinedFlatTableDesc(desc, null);
        int nColumns = flatTableDesc.getColumnList().size();

        List<TblColRef> columns = Cuboid.getBaseCuboid(desc).getColumns();
        for (int c = 0; c < columns.size(); c++) {
            TblColRef col = columns.get(c);
            if (desc.getRowkey().isUseDictionary(col)) {
                logger.info("Building dictionary for " + col);
                List<byte[]> valueList = readValueList(flatTable, nColumns,
                        flatTableDesc.getRowKeyColumnIndexes()[c]);
                Dictionary<String> dict = DictionaryGenerator.buildDictionaryFromValueEnumerator(col.getType(),
                        new IterableDictionaryValueEnumerator(valueList));
                result.put(col, dict);
            }
        }

        for (int measureIdx = 0; measureIdx < cube.getDescriptor().getMeasures().size(); measureIdx++) {
            MeasureDesc measureDesc = cube.getDescriptor().getMeasures().get(measureIdx);
            FunctionDesc func = measureDesc.getFunction();
            List<TblColRef> dictCols = func.getMeasureType().getColumnsNeedDictionary(func);
            if (dictCols.isEmpty())
                continue;

            int[] flatTableIdx = flatTableDesc.getMeasureColumnIndexes()[measureIdx];
            List<TblColRef> paramCols = func.getParameter().getColRefs();
            for (int i = 0; i < paramCols.size(); i++) {
                TblColRef col = paramCols.get(i);
                if (dictCols.contains(col)) {
                    int colIdxOnFlat = flatTableIdx[i];
                    logger.info("Building dictionary for " + col);
                    List<byte[]> valueList = readValueList(flatTable, nColumns, colIdxOnFlat);
                    Dictionary<String> dict = DictionaryGenerator.buildDictionaryFromValueEnumerator(col.getType(),
                            new IterableDictionaryValueEnumerator(valueList));

                    result.put(col, dict);
                }
            }
        }

        return result;
    }

    private static List<byte[]> readValueList(String flatTable, int nColumns, int c) throws IOException {
        List<byte[]> result = Lists.newArrayList();
        List<String> lines = FileUtils.readLines(new File(flatTable), "UTF-8");
        for (String line : lines) {
            String[] row = line.trim().split(",");
            if (row.length != nColumns) {
                throw new IllegalStateException();
            }
            if (row[c] != null) {
                result.add(Bytes.toBytes(row[c]));
            }
        }
        return result;
    }

    class ConsoleGTRecordWriter implements ICuboidWriter {

        boolean verbose = false;

        @Override
        public void write(long cuboidId, GTRecord record) throws IOException {
            if (verbose)
                System.out.println(record.toString());
        }

        @Override
        public void flush() {
            if (verbose) {
                System.out.println("flush");
            }
        }

        @Override
        public void close() {
            if (verbose) {
                System.out.println("close");
            }
        }
    }
}