org.apache.kylin.provision.BuildIIWithStream.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.kylin.provision.BuildIIWithStream.java

Source

/*
 *
 *
 *  Licensed to the Apache Software Foundation (ASF) under one or more
 *
 *  contributor license agreements. See the NOTICE file distributed with
 *
 *  this work for additional information regarding copyright ownership.
 *
 *  The ASF licenses this file to You under the Apache License, Version 2.0
 *
 *  (the "License"); you may not use this file except in compliance with
 *
 *  the License. You may obtain a copy of the License at
 *
 *
 *
 *  http://www.apache.org/licenses/LICENSE-2.0
 *
 *
 *
 *  Unless required by applicable law or agreed to in writing, software
 *
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *
 *  See the License for the specific language governing permissions and
 *
 *  limitations under the License.
 *
 * /
 */

package org.apache.kylin.provision;

import java.io.File;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.TimeZone;
import java.util.UUID;

import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.hbase.client.HTableInterface;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.util.ToolRunner;
import org.apache.kylin.common.KylinConfig;
import org.apache.kylin.common.util.ClassUtil;
import org.apache.kylin.common.util.DateFormat;
import org.apache.kylin.common.util.HBaseMetadataTestCase;
import org.apache.kylin.common.util.Pair;
import org.apache.kylin.common.util.StreamingBatch;
import org.apache.kylin.common.util.StreamingMessage;
import org.apache.kylin.engine.mr.JobBuilderSupport;
import org.apache.kylin.invertedindex.IIInstance;
import org.apache.kylin.invertedindex.IIManager;
import org.apache.kylin.invertedindex.IISegment;
import org.apache.kylin.invertedindex.index.Slice;
import org.apache.kylin.invertedindex.index.SliceBuilder;
import org.apache.kylin.invertedindex.model.IIDesc;
import org.apache.kylin.invertedindex.model.IIJoinedFlatTableDesc;
import org.apache.kylin.invertedindex.model.IIKeyValueCodec;
import org.apache.kylin.invertedindex.model.IIRow;
import org.apache.kylin.job.DeployUtil;
import org.apache.kylin.job.JoinedFlatTable;
import org.apache.kylin.job.common.ShellExecutable;
import org.apache.kylin.job.constant.ExecutableConstants;
import org.apache.kylin.job.engine.JobEngineConfig;
import org.apache.kylin.metadata.model.TblColRef;
import org.apache.kylin.metadata.realization.RealizationStatusEnum;
import org.apache.kylin.source.hive.HiveCmdBuilder;
import org.apache.kylin.source.hive.HiveTableReader;
import org.apache.kylin.storage.hbase.HBaseConnection;
import org.apache.kylin.storage.hbase.ii.IICreateHTableJob;
import org.apache.kylin.storage.hbase.util.StorageCleanupJob;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.google.common.collect.Lists;

public class BuildIIWithStream {

    private static final Logger logger = LoggerFactory.getLogger(BuildIIWithStream.class);

    private static final String[] II_NAME = new String[] { "test_kylin_ii_left_join", "test_kylin_ii_inner_join" };
    private IIManager iiManager;
    private KylinConfig kylinConfig;

    public static void main(String[] args) throws Exception {
        try {
            beforeClass();
            BuildIIWithStream buildCubeWithEngine = new BuildIIWithStream();
            buildCubeWithEngine.before();
            buildCubeWithEngine.build();
            logger.info("Build is done");
            afterClass();
            logger.info("Going to exit");
            System.exit(0);
        } catch (Exception e) {
            logger.error("error", e);
            System.exit(1);
        }
    }

    public static void beforeClass() throws Exception {
        logger.info("Adding to classpath: " + new File(HBaseMetadataTestCase.SANDBOX_TEST_DATA).getAbsolutePath());
        ClassUtil.addClasspath(new File(HBaseMetadataTestCase.SANDBOX_TEST_DATA).getAbsolutePath());
        if (StringUtils.isEmpty(System.getProperty("hdp.version"))) {
            throw new RuntimeException(
                    "No hdp.version set; Please set hdp.version in your jvm option, for example: -Dhdp.version=2.2.4.2-2");
        }
        HBaseMetadataTestCase.staticCreateTestMetadata(HBaseMetadataTestCase.SANDBOX_TEST_DATA);
    }

    protected void deployEnv() throws Exception {
        DeployUtil.overrideJobJarLocations();
    }

    public void before() throws Exception {
        deployEnv();

        kylinConfig = KylinConfig.getInstanceFromEnv();
        iiManager = IIManager.getInstance(kylinConfig);
        for (String iiInstance : II_NAME) {

            IIInstance ii = iiManager.getII(iiInstance);
            if (ii.getStatus() != RealizationStatusEnum.DISABLED) {
                ii.setStatus(RealizationStatusEnum.DISABLED);
                iiManager.updateII(ii);
            }
        }
    }

    public static void afterClass() throws Exception {
        cleanupOldStorage();
        HBaseMetadataTestCase.staticCleanupTestMetadata();
    }

    private String createIntermediateTable(IIDesc desc, KylinConfig kylinConfig) throws IOException {
        IIJoinedFlatTableDesc intermediateTableDesc = new IIJoinedFlatTableDesc(desc);
        JobEngineConfig jobEngineConfig = new JobEngineConfig(kylinConfig);
        final String uuid = UUID.randomUUID().toString();
        final String useDatabaseHql = "USE " + kylinConfig.getHiveDatabaseForIntermediateTable() + ";";
        final String dropTableHql = JoinedFlatTable.generateDropTableStatement(intermediateTableDesc);
        final String createTableHql = JoinedFlatTable.generateCreateTableStatement(intermediateTableDesc,
                JobBuilderSupport.getJobWorkingDir(jobEngineConfig, uuid));
        String insertDataHqls;
        insertDataHqls = JoinedFlatTable.generateInsertDataStatement(intermediateTableDesc, jobEngineConfig);

        ShellExecutable step = new ShellExecutable();
        HiveCmdBuilder hiveCmdBuilder = new HiveCmdBuilder();
        hiveCmdBuilder.addStatement(useDatabaseHql);
        hiveCmdBuilder.addStatement(dropTableHql);
        hiveCmdBuilder.addStatement(createTableHql);
        hiveCmdBuilder.addStatement(insertDataHqls);

        step.setCmd(hiveCmdBuilder.build());
        logger.info(step.getCmd());
        step.setName(ExecutableConstants.STEP_NAME_CREATE_FLAT_HIVE_TABLE);
        kylinConfig.getCliCommandExecutor().execute(step.getCmd(), null);
        return intermediateTableDesc.getTableName();
    }

    private void clearSegment(String iiName) throws Exception {
        IIInstance ii = iiManager.getII(iiName);
        ii.getSegments().clear();
        iiManager.updateII(ii);
    }

    private IISegment createSegment(String iiName) throws Exception {
        clearSegment(iiName);
        SimpleDateFormat f = new SimpleDateFormat("yyyy-MM-dd");
        f.setTimeZone(TimeZone.getTimeZone("GMT"));

        long date1 = 0;
        long date2 = f.parse("2015-01-01").getTime();
        return buildSegment(iiName, date1, date2);
    }

    private IISegment buildSegment(String iiName, long startDate, long endDate) throws Exception {
        IIInstance iiInstance = iiManager.getII(iiName);
        IISegment segment = iiManager.buildSegment(iiInstance, startDate, endDate);
        iiInstance.getSegments().add(segment);
        iiManager.updateII(iiInstance);
        return segment;
    }

    private void buildII(String iiName) throws Exception {
        final IIDesc desc = iiManager.getII(iiName).getDescriptor();
        final String tableName = createIntermediateTable(desc, kylinConfig);
        logger.info("intermediate table name:" + tableName);

        HiveTableReader reader = new HiveTableReader("default", tableName);
        final List<TblColRef> tblColRefs = desc.listAllColumns();
        for (TblColRef tblColRef : tblColRefs) {
            if (desc.isMetricsCol(tblColRef)) {
                logger.info("matrix:" + tblColRef.getName());
            } else {
                logger.info("measure:" + tblColRef.getName());
            }
        }
        final IISegment segment = createSegment(iiName);
        final HTableInterface htable = HBaseConnection.get(KylinConfig.getInstanceFromEnv().getStorageUrl())
                .getTable(segment.getStorageLocationIdentifier());
        String[] args = new String[] { "-iiname", iiName, "-htablename", segment.getStorageLocationIdentifier() };
        ToolRunner.run(new IICreateHTableJob(), args);

        final IIDesc iiDesc = segment.getIIDesc();
        final SliceBuilder sliceBuilder = new SliceBuilder(desc, (short) 0);

        List<String[]> sorted = getSortedRows(reader, desc.getTimestampColumn());
        int count = sorted.size();
        ArrayList<StreamingMessage> messages = Lists.newArrayList();
        for (String[] row : sorted) {
            messages.add((parse(row)));
            if (messages.size() >= iiDesc.getSliceSize()) {
                build(sliceBuilder, new StreamingBatch(messages,
                        Pair.newPair(System.currentTimeMillis(), System.currentTimeMillis())), htable);
                messages.clear();
            }
        }

        if (!messages.isEmpty()) {
            build(sliceBuilder, new StreamingBatch(messages,
                    Pair.newPair(System.currentTimeMillis(), System.currentTimeMillis())), htable);
        }

        reader.close();
        logger.info("total record count:" + count + " htable:" + segment.getStorageLocationIdentifier());
        logger.info("stream build finished, htable name:" + segment.getStorageLocationIdentifier());
    }

    public void build() throws Exception {
        for (String iiName : II_NAME) {
            buildII(iiName);
            IIInstance ii = iiManager.getII(iiName);
            if (ii.getStatus() != RealizationStatusEnum.READY) {
                ii.setStatus(RealizationStatusEnum.READY);
                iiManager.updateII(ii);
            }
        }
    }

    private void build(SliceBuilder sliceBuilder, StreamingBatch batch, HTableInterface htable) throws IOException {
        final Slice slice = sliceBuilder.buildSlice(batch);
        try {
            loadToHBase(htable, slice, new IIKeyValueCodec(slice.getInfo()));
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    private void loadToHBase(HTableInterface hTable, Slice slice, IIKeyValueCodec codec) throws IOException {
        List<Put> data = Lists.newArrayList();
        for (IIRow row : codec.encodeKeyValue(slice)) {
            final byte[] key = row.getKey().get();
            final byte[] value = row.getValue().get();
            Put put = new Put(key);
            put.add(IIDesc.HBASE_FAMILY_BYTES, IIDesc.HBASE_QUALIFIER_BYTES, value);
            final ImmutableBytesWritable dictionary = row.getDictionary();
            final byte[] dictBytes = dictionary.get();
            if (dictionary.getOffset() == 0 && dictionary.getLength() == dictBytes.length) {
                put.add(IIDesc.HBASE_FAMILY_BYTES, IIDesc.HBASE_DICTIONARY_BYTES, dictBytes);
            } else {
                throw new RuntimeException("dict offset should be 0, and dict length should be " + dictBytes.length
                        + " but they are" + dictionary.getOffset() + " " + dictionary.getLength());
            }
            data.add(put);
        }
        hTable.put(data);
        //omit hTable.flushCommits(), because htable is auto flush
    }

    private StreamingMessage parse(String[] row) {
        return new StreamingMessage(Lists.newArrayList(row), System.currentTimeMillis(), System.currentTimeMillis(),
                Collections.<String, Object>emptyMap());
    }

    private List<String[]> getSortedRows(HiveTableReader reader, final int tsCol) throws IOException {
        List<String[]> unsorted = Lists.newArrayList();
        while (reader.next()) {
            unsorted.add(reader.getRow());
        }
        Collections.sort(unsorted, new Comparator<String[]>() {
            @Override
            public int compare(String[] o1, String[] o2) {
                long t1 = DateFormat.stringToMillis(o1[tsCol]);
                long t2 = DateFormat.stringToMillis(o2[tsCol]);
                return Long.compare(t1, t2);
            }
        });
        return unsorted;
    }

    private static int cleanupOldStorage() throws Exception {
        String[] args = { "--delete", "true" };

        int exitCode = ToolRunner.run(new StorageCleanupJob(), args);
        return exitCode;
    }

}