org.apache.parquet.thrift.TestProtocolReadToWrite.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.parquet.thrift.TestProtocolReadToWrite.java

Source

/* 
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * 
 *   http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.parquet.thrift;

import com.twitter.data.proto.tutorial.thrift.*;
import com.twitter.elephantbird.thrift.test.TestMapInSet;
import org.apache.thrift.TBase;
import org.apache.thrift.TException;
import org.apache.thrift.protocol.TCompactProtocol;
import org.apache.thrift.protocol.TField;
import org.apache.thrift.transport.TIOStreamTransport;
import org.junit.Test;
import org.apache.parquet.thrift.test.Phone;
import org.apache.parquet.thrift.test.StructWithExtraField;
import org.apache.parquet.thrift.test.StructWithIndexStartsFrom4;
import org.apache.parquet.thrift.test.compat.*;
import thrift.test.OneOfEach;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.ByteBuffer;
import java.util.*;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;

public class TestProtocolReadToWrite {

    @Test
    public void testOneOfEach() throws Exception {
        OneOfEach a = new OneOfEach(true, false, (byte) 8, (short) 16, (int) 32, (long) 64, (double) 1234, "string",
                "", false, ByteBuffer.wrap("a".getBytes()), new ArrayList<Byte>(), new ArrayList<Short>(),
                new ArrayList<Long>());
        writeReadCompare(a);
    }

    @Test
    public void testWriteRead() throws Exception {
        ArrayList<Person> persons = new ArrayList<Person>();
        final PhoneNumber phoneNumber = new PhoneNumber("555 999 9998");
        phoneNumber.type = PhoneType.HOME;
        persons.add(new Person(new Name("Bob", "Roberts"), 1, "bob@roberts.com",
                Arrays.asList(new PhoneNumber("555 999 9999"), phoneNumber)));
        persons.add(new Person(new Name("Dick", "Richardson"), 2, "dick@richardson.com",
                Arrays.asList(new PhoneNumber("555 999 9997"), new PhoneNumber("555 999 9996"))));
        AddressBook a = new AddressBook(persons);
        writeReadCompare(a);
    }

    @Test
    public void testEmptyStruct() throws Exception {
        AddressBook a = new AddressBook();
        writeReadCompare(a);
    }

    @Test
    public void testMapSet() throws Exception {
        final Set<Map<String, String>> set = new HashSet<Map<String, String>>();
        final Map<String, String> map = new HashMap<String, String>();
        map.put("foo", "bar");
        set.add(map);
        TestMapInSet a = new TestMapInSet("top", set);
        writeReadCompare(a);
    }

    private void writeReadCompare(TBase<?, ?> a) throws TException, InstantiationException, IllegalAccessException {
        ProtocolPipe[] pipes = { new ProtocolReadToWrite(), new BufferedProtocolReadToWrite(
                new ThriftSchemaConverter().toStructType((Class<TBase<?, ?>>) a.getClass())) };
        for (ProtocolPipe p : pipes) {
            final ByteArrayOutputStream in = new ByteArrayOutputStream();
            final ByteArrayOutputStream out = new ByteArrayOutputStream();
            a.write(protocol(in));
            p.readOne(protocol(new ByteArrayInputStream(in.toByteArray())), protocol(out));
            TBase<?, ?> b = a.getClass().newInstance();
            b.read(protocol(new ByteArrayInputStream(out.toByteArray())));

            assertEquals(p.getClass().getSimpleName(), a, b);
        }
    }

    @Test
    public void testIncompatibleSchemaRecord() throws Exception {
        //handler will rethrow the exception for verifying purpose
        CountingErrorHandler countingHandler = new CountingErrorHandler();

        BufferedProtocolReadToWrite p = new BufferedProtocolReadToWrite(
                new ThriftSchemaConverter().toStructType(AddressBook.class), countingHandler);

        final ByteArrayOutputStream in = new ByteArrayOutputStream();
        final ByteArrayOutputStream out = new ByteArrayOutputStream();
        OneOfEach a = new OneOfEach(true, false, (byte) 8, (short) 16, (int) 32, (long) 64, (double) 1234, "string",
                "", false, ByteBuffer.wrap("a".getBytes()), new ArrayList<Byte>(), new ArrayList<Short>(),
                new ArrayList<Long>());
        a.write(protocol(in));
        try {
            p.readOne(protocol(new ByteArrayInputStream(in.toByteArray())), protocol(out));
            fail("this should throw");
        } catch (SkippableException e) {
            Throwable cause = e.getCause();
            assertTrue(cause instanceof DecodingSchemaMismatchException);
            assertTrue(cause.getMessage().contains("the data type does not match the expected thrift structure"));
            assertTrue(cause.getMessage().contains("got BOOL"));
        }
        assertEquals(0, countingHandler.recordCountOfMissingFields);
        assertEquals(0, countingHandler.fieldIgnoredCount);
    }

    @Test
    public void testUnrecognizedUnionMemberSchema() throws Exception {
        CountingErrorHandler countingHandler = new CountingErrorHandler();
        BufferedProtocolReadToWrite p = new BufferedProtocolReadToWrite(
                new ThriftSchemaConverter().toStructType(StructWithUnionV1.class), countingHandler);
        final ByteArrayOutputStream in = new ByteArrayOutputStream();
        final ByteArrayOutputStream out = new ByteArrayOutputStream();
        StructWithUnionV1 validUnion = new StructWithUnionV1("a valid struct", UnionV1.aLong(new ALong(17L)));
        StructWithUnionV2 invalidUnion = new StructWithUnionV2("a struct with new union member",
                UnionV2.aNewBool(new ABool(true)));

        validUnion.write(protocol(in));
        invalidUnion.write(protocol(in));

        ByteArrayInputStream baos = new ByteArrayInputStream(in.toByteArray());

        // first one should not throw
        p.readOne(protocol(baos), protocol(out));

        try {
            p.readOne(protocol(baos), protocol(out));
            fail("this should throw");
        } catch (SkippableException e) {
            Throwable cause = e.getCause();
            assertEquals(DecodingSchemaMismatchException.class, cause.getClass());
            assertTrue(cause.getMessage().startsWith("Unrecognized union member with id: 3 for struct:"));
        }
        assertEquals(0, countingHandler.recordCountOfMissingFields);
        assertEquals(0, countingHandler.fieldIgnoredCount);
    }

    @Test
    public void testUnionWithExtraOrNoValues() throws Exception {
        CountingErrorHandler countingHandler = new CountingErrorHandler();
        BufferedProtocolReadToWrite p = new BufferedProtocolReadToWrite(
                new ThriftSchemaConverter().toStructType(StructWithUnionV2.class), countingHandler);
        ByteArrayOutputStream in = new ByteArrayOutputStream();
        final ByteArrayOutputStream out = new ByteArrayOutputStream();

        StructWithUnionV2 validUnion = new StructWithUnionV2("a valid struct", UnionV2.aLong(new ALong(17L)));

        StructWithAStructThatLooksLikeUnionV2 allMissing = new StructWithAStructThatLooksLikeUnionV2("all missing",
                new AStructThatLooksLikeUnionV2());

        AStructThatLooksLikeUnionV2 extra = new AStructThatLooksLikeUnionV2();
        extra.setALong(new ALong(18L));
        extra.setANewBool(new ABool(false));

        StructWithAStructThatLooksLikeUnionV2 hasExtra = new StructWithAStructThatLooksLikeUnionV2("has extra",
                new AStructThatLooksLikeUnionV2(extra));

        validUnion.write(protocol(in));
        allMissing.write(protocol(in));

        ByteArrayInputStream baos = new ByteArrayInputStream(in.toByteArray());

        // first one should not throw
        p.readOne(protocol(baos), protocol(out));

        try {
            p.readOne(protocol(baos), protocol(out));
            fail("this should throw");
        } catch (SkippableException e) {
            Throwable cause = e.getCause();
            assertEquals(DecodingSchemaMismatchException.class, cause.getClass());
            assertTrue(cause.getMessage().startsWith("Cannot write a TUnion with no set value in"));
        }
        assertEquals(0, countingHandler.recordCountOfMissingFields);
        assertEquals(0, countingHandler.fieldIgnoredCount);

        in = new ByteArrayOutputStream();
        validUnion.write(protocol(in));
        hasExtra.write(protocol(in));

        baos = new ByteArrayInputStream(in.toByteArray());

        // first one should not throw
        p.readOne(protocol(baos), protocol(out));

        try {
            p.readOne(protocol(baos), protocol(out));
            fail("this should throw");
        } catch (SkippableException e) {
            Throwable cause = e.getCause();
            assertEquals(DecodingSchemaMismatchException.class, cause.getClass());
            assertTrue(cause.getMessage().startsWith("Cannot write a TUnion with more than 1 set value in"));
        }
        assertEquals(0, countingHandler.recordCountOfMissingFields);
        assertEquals(0, countingHandler.fieldIgnoredCount);
    }

    /**
     * When enum value in data has an undefined index, it's considered as corrupted record and will be skipped.
     *
     * @throws Exception
     */
    @Test
    public void testEnumMissingSchema() throws Exception {
        CountingErrorHandler countingHandler = new CountingErrorHandler();
        BufferedProtocolReadToWrite p = new BufferedProtocolReadToWrite(
                new ThriftSchemaConverter().toStructType(StructWithEnum.class), countingHandler);
        final ByteArrayOutputStream in = new ByteArrayOutputStream();
        final ByteArrayOutputStream out = new ByteArrayOutputStream();
        StructWithMoreEnum enumDefinedInOldDefinition = new StructWithMoreEnum(NumberEnumWithMoreValue.THREE);
        StructWithMoreEnum extraEnumDefinedInNewDefinition = new StructWithMoreEnum(NumberEnumWithMoreValue.FOUR);
        enumDefinedInOldDefinition.write(protocol(in));
        extraEnumDefinedInNewDefinition.write(protocol(in));

        ByteArrayInputStream baos = new ByteArrayInputStream(in.toByteArray());

        // first should not throw
        p.readOne(protocol(baos), protocol(out));

        try {
            p.readOne(protocol(baos), protocol(out));
            fail("this should throw");
        } catch (SkippableException e) {
            Throwable cause = e.getCause();
            assertEquals(DecodingSchemaMismatchException.class, cause.getClass());
            assertTrue(cause.getMessage().contains("can not find index 4 in enum"));
        }
        assertEquals(0, countingHandler.recordCountOfMissingFields);
        assertEquals(0, countingHandler.fieldIgnoredCount);
    }

    /**
     * When data contains extra field, it should notify the handler and read the data with extra field dropped
     * @throws Exception
     */
    @Test
    public void testMissingFieldHandling() throws Exception {

        CountingErrorHandler countingHandler = new CountingErrorHandler() {
            @Override
            public void handleFieldIgnored(TField field) {
                assertEquals(field.id, 4);
                fieldIgnoredCount++;
            }
        };
        BufferedProtocolReadToWrite structForRead = new BufferedProtocolReadToWrite(
                new ThriftSchemaConverter().toStructType(StructV3.class), countingHandler);

        //Data has an extra field of type struct
        final ByteArrayOutputStream in = new ByteArrayOutputStream();
        StructV4WithExtracStructField dataWithNewSchema = new StructV4WithExtracStructField("name");
        dataWithNewSchema.setAge("10");
        dataWithNewSchema.setGender("male");
        StructV3 structV3 = new StructV3("name");
        structV3.setAge("10");
        dataWithNewSchema.setAddedStruct(structV3);
        dataWithNewSchema.write(protocol(in));

        //read using the schema that doesn't have the extra field
        final ByteArrayOutputStream out = new ByteArrayOutputStream();
        structForRead.readOne(protocol(new ByteArrayInputStream(in.toByteArray())), protocol(out));

        //record will be read without extra field
        assertEquals(1, countingHandler.recordCountOfMissingFields);
        assertEquals(1, countingHandler.fieldIgnoredCount);

        StructV4WithExtracStructField b = StructV4WithExtracStructField.class.newInstance();
        b.read(protocol(new ByteArrayInputStream(out.toByteArray())));
        assertEquals(dataWithNewSchema.getName(), b.getName());
        assertEquals(dataWithNewSchema.getAge(), b.getAge());
        assertEquals(dataWithNewSchema.getGender(), b.getGender());
        assertEquals(null, b.getAddedStruct());
    }

    @Test
    public void TestExtraFieldWhenFieldIndexIsNotStartFromZero() throws Exception {
        CountingErrorHandler countingHandler = new CountingErrorHandler() {
            @Override
            public void handleFieldIgnored(TField field) {
                assertEquals(3, field.id);
                fieldIgnoredCount++;
            }
        };

        BufferedProtocolReadToWrite structForRead = new BufferedProtocolReadToWrite(
                new ThriftSchemaConverter().toStructType(StructWithIndexStartsFrom4.class), countingHandler);

        //Data has an extra field of type struct
        final ByteArrayOutputStream in = new ByteArrayOutputStream();
        StructWithExtraField dataWithNewExtraField = new StructWithExtraField(new Phone("111", "222"),
                new Phone("333", "444"));
        dataWithNewExtraField.write(protocol(in));

        //read using the schema that doesn't have the extra field
        final ByteArrayOutputStream out = new ByteArrayOutputStream();
        structForRead.readOne(protocol(new ByteArrayInputStream(in.toByteArray())), protocol(out));

        assertEquals(1, countingHandler.recordCountOfMissingFields);
        assertEquals(1, countingHandler.fieldIgnoredCount);
    }

    private TCompactProtocol protocol(OutputStream to) {
        return new TCompactProtocol(new TIOStreamTransport(to));
    }

    private TCompactProtocol protocol(InputStream from) {
        return new TCompactProtocol(new TIOStreamTransport(from));
    }

    class CountingErrorHandler extends FieldIgnoredHandler {
        int fieldIgnoredCount = 0;
        int recordCountOfMissingFields = 0;

        @Override
        public void handleRecordHasFieldIgnored() {
            recordCountOfMissingFields++;
        }

        @Override
        public void handleFieldIgnored(TField field) {
            fieldIgnoredCount++;
        }
    }
}