org.apache.kylin.invertedindex.InvertedIndexLocalTest.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.kylin.invertedindex.InvertedIndexLocalTest.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * 
 *     http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
*/

package org.apache.kylin.invertedindex;

import static org.junit.Assert.assertEquals;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

import javax.annotation.Nullable;

import org.apache.commons.io.IOUtils;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.kylin.common.util.BytesUtil;
import org.apache.kylin.common.util.Dictionary;
import org.apache.kylin.common.util.LocalFileMetadataTestCase;
import org.apache.kylin.dict.DictionaryGenerator;
import org.apache.kylin.dict.IterableDictionaryValueEnumerator;
import org.apache.kylin.invertedindex.index.CompressedValueContainer;
import org.apache.kylin.invertedindex.index.RawTableRecord;
import org.apache.kylin.invertedindex.index.ShardingSliceBuilder;
import org.apache.kylin.invertedindex.index.Slice;
import org.apache.kylin.invertedindex.index.TableRecord;
import org.apache.kylin.invertedindex.index.TableRecordInfo;
import org.apache.kylin.invertedindex.model.IIDesc;
import org.apache.kylin.invertedindex.model.IIKeyValueCodec;
import org.apache.kylin.invertedindex.model.IIRow;
import org.apache.kylin.metadata.model.TblColRef;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;

import com.google.common.base.Function;
import com.google.common.collect.Collections2;
import com.google.common.collect.HashMultimap;
import com.google.common.collect.Lists;
import com.google.common.collect.SetMultimap;
import com.google.common.collect.Sets;

public class InvertedIndexLocalTest extends LocalFileMetadataTestCase {

    IIInstance ii;
    TableRecordInfo info;
    List<String> lines;
    private Dictionary<?>[] dictionaryMap;

    @Before
    public void setUp() throws Exception {
        this.createTestMetadata();
        this.ii = IIManager.getInstance(getTestConfig()).getII("test_kylin_ii_left_join");

        File file = new File(LOCALMETA_TEST_DATA, "data/flatten_data_for_ii.csv");
        FileInputStream in = new FileInputStream(file);
        this.lines = IOUtils.readLines(in, "UTF-8");
        in.close();

        dictionaryMap = buildDictionary(Lists.transform(lines, new Function<String, List<String>>() {
            @Nullable
            @Override
            public List<String> apply(@Nullable String input) {
                return Lists.newArrayList(input.split(","));
            }
        }), ii.getDescriptor());
        this.info = new TableRecordInfo(ii.getDescriptor(), dictionaryMap);
    }

    @After
    public void after() throws Exception {
        this.cleanupTestMetadata();
    }

    @Test
    public void testCompressedValueContainer() {
        // create container
        CompressedValueContainer container = new CompressedValueContainer(info.getDigest(), 0, 500);
        Dictionary<String> dict = info.dict(0);

        byte[] buf = new byte[dict.getSizeOfId()];
        ImmutableBytesWritable bytes = new ImmutableBytesWritable(buf);

        for (int v = dict.getMinId(); v <= dict.getMaxId(); v++) {
            BytesUtil.writeUnsigned(v, buf, 0, dict.getSizeOfId());
            container.append(bytes);
        }
        BytesUtil.writeUnsigned(Dictionary.NULL_ID[dict.getSizeOfId()], buf, 0, dict.getSizeOfId());
        container.append(bytes);
        container.closeForChange();

        // copy by serialization
        ImmutableBytesWritable copy = container.toBytes();
        CompressedValueContainer container2 = new CompressedValueContainer(info.getDigest(), 0, 500);
        container2.fromBytes(copy);

        // check the copy
        int i = 0;
        for (int v = dict.getMinId(); v <= dict.getMaxId(); v++) {
            container2.getValueAt(i++, bytes);
            int value = BytesUtil.readUnsigned(bytes.get(), bytes.getOffset(), bytes.getLength());
            assertEquals(v, value);
        }
        container2.getValueAt(i++, bytes);
        int value = BytesUtil.readUnsigned(bytes.get(), bytes.getOffset(), bytes.getLength());
        assertEquals(Dictionary.NULL_ID[dict.getSizeOfId()], value);
        assertEquals(container, container2);
    }

    @Test
    public void testCodec() throws IOException {
        List<TableRecord> records = loadRecordsSorted();
        System.out.println(records.size() + " records");
        List<Slice> slices = buildTimeSlices(records);
        System.out.println(slices.size() + " slices");

        IIKeyValueCodec codec = new IIKeyValueCodec(info.getDigest());
        List<IIRow> kvs = encodeKVs(codec, slices);
        System.out.println(kvs.size() + " KV pairs");

        List<Slice> slicesCopy = decodeKVs(codec, kvs);
        assertEquals(slices.size(), slicesCopy.size());
        for (int i = 0; i < slices.size(); i++) {
            assertEquals(slices.get(i), slicesCopy.get(i));
        }

        List<TableRecord> recordsCopy = iterateRecords(slicesCopy);
        assertEquals(new HashSet<TableRecord>(records), new HashSet<TableRecord>(recordsCopy));
        dump(recordsCopy);
    }

    private Dictionary<?>[] buildDictionary(List<List<String>> table, IIDesc desc) throws IOException {
        SetMultimap<TblColRef, String> valueMap = HashMultimap.create();
        Set<TblColRef> dimensionColumns = Sets.newHashSet();
        for (int i = 0; i < desc.listAllColumns().size(); i++) {
            if (!desc.isMetricsCol(i)) {
                dimensionColumns.add(desc.listAllColumns().get(i));
            }
        }
        for (List<String> row : table) {
            for (int i = 0; i < row.size(); i++) {
                String cell = row.get(i);
                valueMap.put(desc.listAllColumns().get(i), cell);
            }
        }
        Dictionary<?>[] result = new Dictionary<?>[desc.listAllColumns().size()];
        for (TblColRef tblColRef : valueMap.keys()) {
            result[desc.findColumn(tblColRef)] = DictionaryGenerator.buildDictionary(tblColRef.getType(),
                    new IterableDictionaryValueEnumerator(
                            Collections2.transform(valueMap.get(tblColRef), new Function<String, byte[]>() {
                                @Nullable
                                @Override
                                public byte[] apply(String input) {
                                    try {
                                        return input.getBytes("UTF-8");
                                    } catch (UnsupportedEncodingException e) {
                                        throw new RuntimeException(e);
                                    }
                                }
                            })));
        }
        return result;
    }

    private List<TableRecord> loadRecordsSorted() throws IOException {
        List<TableRecord> records = Lists.newArrayList();
        for (String line : lines) {
            String[] fields = line.split(",");
            TableRecord rec = info.createTableRecord();
            for (int col = 0; col < fields.length; col++) {
                rec.setValueString(col, fields[col]);
            }
            records.add(rec);
        }

        Collections.sort(records, new Comparator<TableRecord>() {
            @Override
            public int compare(TableRecord a, TableRecord b) {
                long x = a.getTimestamp() - b.getTimestamp();
                if (x > 0)
                    return 1;
                else if (x == 0)
                    return 0;
                else
                    return -1;
            }
        });

        return records;
    }

    private List<Slice> buildTimeSlices(List<TableRecord> records) throws IOException {
        ShardingSliceBuilder builder = new ShardingSliceBuilder(info);
        List<Slice> slices = Lists.newArrayList();
        for (TableRecord rec : records) {
            //here assume there less records than slice size for each shard
            Slice slice = builder.append(rec);
            if (slice != null) {
                slice.setLocalDictionaries(dictionaryMap);
                slices.add(slice);
            }
        }
        List<Slice> finals = builder.close();
        for (Slice slice : finals) {
            slice.setLocalDictionaries(dictionaryMap);
        }
        slices.addAll(finals);

        Collections.sort(slices);
        return slices;
    }

    private List<IIRow> encodeKVs(IIKeyValueCodec codec, List<Slice> slices) {

        List<IIRow> kvs = Lists.newArrayList();
        for (Slice slice : slices) {
            kvs.addAll(codec.encodeKeyValue(slice));
        }
        return kvs;
    }

    private List<Slice> decodeKVs(IIKeyValueCodec codec, List<IIRow> kvs) {
        List<Slice> slices = Lists.newArrayList();
        for (Slice slice : codec.decodeKeyValue(kvs)) {
            slices.add(slice);
        }
        return slices;
    }

    private List<TableRecord> iterateRecords(List<Slice> slices) {
        List<TableRecord> records = Lists.newArrayList();
        for (Slice slice : slices) {
            for (RawTableRecord rec : slice) {
                records.add(new TableRecord((RawTableRecord) rec.clone(), info));
            }
        }
        return records;
    }

    private void dump(Iterable<TableRecord> records) {
        for (TableRecord rec : records) {
            System.out.println(rec.toString());
        }
    }

}