jp.co.cyberagent.parquet.msgpack.compat.TestParquetThriftCompatibility.java Source code

Java tutorial

Introduction

Here is the source code for jp.co.cyberagent.parquet.msgpack.compat.TestParquetThriftCompatibility.java

Source

/*
 * This class includes code from Apache Spark.
 *
 * Copyright 2017 CyberAgent, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package jp.co.cyberagent.parquet.msgpack.compat;

import jp.co.cyberagent.parquet.msgpack.ParquetIterator;
import org.apache.commons.lang3.StringUtils;
import org.junit.Test;
import org.msgpack.value.Value;

import java.util.HashMap;
import java.util.Map;

import static org.hamcrest.CoreMatchers.is;
import static org.junit.Assert.assertThat;
import static org.msgpack.value.ValueFactory.newArray;
import static org.msgpack.value.ValueFactory.newBoolean;
import static org.msgpack.value.ValueFactory.newFloat;
import static org.msgpack.value.ValueFactory.newInteger;
import static org.msgpack.value.ValueFactory.newMap;
import static org.msgpack.value.ValueFactory.newNil;
import static org.msgpack.value.ValueFactory.newString;

public class TestParquetThriftCompatibility {
    @Test
    public void testing() {
        ParquetIterator parquet = ParquetIterator
                .fromResource("test-data/spark/parquet-thrift-compat.snappy.parquet");

        String[] suits = new String[] { "SPADES", "HEARTS", "DIAMONDS", "CLUBS" };
        for (int i = 0; i < 10; i++) {
            HashMap<Value, Value> nonNullablePrimitiveValues = new HashMap<>();
            {
                HashMap<Value, Value> m = nonNullablePrimitiveValues;
                m.put(newString("boolColumn"), newBoolean(i % 2 == 0));
                m.put(newString("byteColumn"), newInteger(i));
                m.put(newString("shortColumn"), newInteger(i + 1));
                m.put(newString("intColumn"), newInteger(i + 2));
                m.put(newString("longColumn"), newInteger(i * 10));
                m.put(newString("doubleColumn"), newFloat(i + 0.2));
                // Thrift `BINARY` values are actually unencoded `STRING` values, and thus are always
                // treated as `BINARY (UTF8)` in parquet-thrift, since parquet-thrift always assume
                // Thrift `STRING`s are encoded using UTF-8.
                m.put(newString("binaryColumn"), newString("val_" + i));
                m.put(newString("stringColumn"), newString("val_" + i));
                // Thrift ENUM values are converted to Parquet binaries containing UTF-8 strings
                m.put(newString("enumColumn"), newString(suits[i % 4]));
            }

            HashMap<Value, Value> nullablePrimitiveValues = new HashMap<>();
            for (Map.Entry<Value, Value> entry : nonNullablePrimitiveValues.entrySet()) {
                Value key = newString("maybe" + StringUtils.capitalize(entry.getKey().toString()));
                Value value = (i % 3 == 0) ? newNil() : entry.getValue();
                nullablePrimitiveValues.put(key, value);
            }

            HashMap<Value, Value> complexValues = new HashMap<>();
            {
                HashMap<Value, Value> m = complexValues;
                m.put(newString("stringsColumn"),
                        newArray(newString("arr_" + i), newString("arr_" + (i + 1)), newString("arr_" + (i + 2))));
                // Thrift `SET`s are converted to Parquet `LIST`s
                m.put(newString("intSetColumn"), newArray(newInteger(i)));
                m.put(newString("intToStringColumn"),
                        newMap(newInteger(i), newString("val_" + i), newInteger(i + 1), newString("val_" + (i + 1)),
                                newInteger(i + 2), newString("val_" + (i + 2))));

                m.put(newString("complexColumn"), newMap(newInteger(i + 0), newComplexInnerValue(i),
                        newInteger(i + 1), newComplexInnerValue(i), newInteger(i + 2), newComplexInnerValue(i)));
            }

            HashMap<Value, Value> row = new HashMap<>();
            row.putAll(nonNullablePrimitiveValues);
            row.putAll(nullablePrimitiveValues);
            row.putAll(complexValues);

            Value expected = newMap(row);
            Value actual = parquet.next();
            assertThat(actual, is(expected));
        }
    }

    private Value newComplexInnerValue(int i) {
        return newArray(
                newMap(newString("nestedIntsColumn"),
                        newArray(newInteger(i + 0 + 0), newInteger(i + 0 + 1), newInteger(i + 0 + 2)),
                        newString("nestedStringColumn"), newString("val_" + (i + 0))),
                newMap(newString("nestedIntsColumn"),
                        newArray(newInteger(i + 1 + 0), newInteger(i + 1 + 1), newInteger(i + 1 + 2)),
                        newString("nestedStringColumn"), newString("val_" + (i + 1))),
                newMap(newString("nestedIntsColumn"),
                        newArray(newInteger(i + 2 + 0), newInteger(i + 2 + 1), newInteger(i + 2 + 2)),
                        newString("nestedStringColumn"), newString("val_" + (i + 2))));
    }
}