org.apache.hive.hcatalog.mapreduce.TestHCatPartitioned.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.hive.hcatalog.mapreduce.TestHCatPartitioned.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.hive.hcatalog.mapreduce;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hive.hcatalog.common.ErrorType;
import org.apache.hive.hcatalog.common.HCatException;
import org.apache.hive.hcatalog.data.DefaultHCatRecord;
import org.apache.hive.hcatalog.data.HCatRecord;
import org.apache.hive.hcatalog.data.schema.HCatFieldSchema;
import org.apache.hive.hcatalog.data.schema.HCatSchema;
import org.apache.hive.hcatalog.data.schema.HCatSchemaUtils;
import org.junit.Test;

import static junit.framework.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.assertFalse;

public class TestHCatPartitioned extends HCatMapReduceTest {

    private static List<HCatRecord> writeRecords;
    private static List<HCatFieldSchema> partitionColumns;

    public TestHCatPartitioned(String formatName, String serdeClass, String inputFormatClass,
            String outputFormatClass) throws Exception {
        super(formatName, serdeClass, inputFormatClass, outputFormatClass);
        tableName = "testHCatPartitionedTable_" + formatName;
        writeRecords = new ArrayList<HCatRecord>();

        for (int i = 0; i < 20; i++) {
            List<Object> objList = new ArrayList<Object>();

            objList.add(i);
            objList.add("strvalue" + i);
            writeRecords.add(new DefaultHCatRecord(objList));
        }

        partitionColumns = new ArrayList<HCatFieldSchema>();
        partitionColumns
                .add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, "")));
        partitionColumns.add(
                HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, "")));
    }

    @Override
    protected List<FieldSchema> getPartitionKeys() {
        List<FieldSchema> fields = new ArrayList<FieldSchema>();
        //Defining partition names in unsorted order
        fields.add(new FieldSchema("PaRT1", serdeConstants.STRING_TYPE_NAME, ""));
        fields.add(new FieldSchema("part0", serdeConstants.INT_TYPE_NAME, ""));
        return fields;
    }

    @Override
    protected List<FieldSchema> getTableColumns() {
        List<FieldSchema> fields = new ArrayList<FieldSchema>();
        fields.add(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, ""));
        fields.add(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, ""));
        return fields;
    }

    @Test
    public void testHCatPartitionedTable() throws Exception {

        Map<String, String> partitionMap = new HashMap<String, String>();
        partitionMap.put("part1", "p1value1");
        partitionMap.put("part0", "501");

        runMRCreate(partitionMap, partitionColumns, writeRecords, 10, true);

        partitionMap.clear();
        partitionMap.put("PART1", "p1value2");
        partitionMap.put("PART0", "502");

        runMRCreate(partitionMap, partitionColumns, writeRecords, 20, true);

        //Test for duplicate publish -- this will either fail on job creation time
        // and throw an exception, or will fail at runtime, and fail the job.

        IOException exc = null;
        try {
            Job j = runMRCreate(partitionMap, partitionColumns, writeRecords, 20, true);
            assertEquals(!isTableImmutable(), j.isSuccessful());
        } catch (IOException e) {
            exc = e;
            assertTrue(exc instanceof HCatException);
            assertTrue(ErrorType.ERROR_DUPLICATE_PARTITION.equals(((HCatException) exc).getErrorType()));
        }
        if (!isTableImmutable()) {
            assertNull(exc);
        }

        //Test for publish with invalid partition key name
        exc = null;
        partitionMap.clear();
        partitionMap.put("px1", "p1value2");
        partitionMap.put("px0", "502");

        try {
            Job j = runMRCreate(partitionMap, partitionColumns, writeRecords, 20, true);
            assertFalse(j.isSuccessful());
        } catch (IOException e) {
            exc = e;
            assertNotNull(exc);
            assertTrue(exc instanceof HCatException);
            assertEquals(ErrorType.ERROR_MISSING_PARTITION_KEY, ((HCatException) exc).getErrorType());
        }

        //Test for publish with missing partition key values
        exc = null;
        partitionMap.clear();
        partitionMap.put("px", "512");

        try {
            runMRCreate(partitionMap, partitionColumns, writeRecords, 20, true);
        } catch (IOException e) {
            exc = e;
        }

        assertNotNull(exc);
        assertTrue(exc instanceof HCatException);
        assertEquals(ErrorType.ERROR_INVALID_PARTITION_VALUES, ((HCatException) exc).getErrorType());

        //Test for null partition value map
        exc = null;
        try {
            runMRCreate(null, partitionColumns, writeRecords, 20, false);
        } catch (IOException e) {
            exc = e;
        }

        assertTrue(exc == null);
        //    assertTrue(exc instanceof HCatException);
        //    assertEquals(ErrorType.ERROR_PUBLISHING_PARTITION, ((HCatException) exc).getErrorType());
        // With Dynamic partitioning, this isn't an error that the keyValues specified didn't values

        //Read should get 10 + 20 rows if immutable, 50 (10+20+20) if mutable
        if (isTableImmutable()) {
            runMRRead(30);
        } else {
            runMRRead(50);
        }

        //Read with partition filter
        runMRRead(10, "part1 = \"p1value1\"");
        runMRRead(10, "part0 = \"501\"");
        if (isTableImmutable()) {
            runMRRead(20, "part1 = \"p1value2\"");
            runMRRead(30, "part1 = \"p1value1\" or part1 = \"p1value2\"");
            runMRRead(20, "part0 = \"502\"");
            runMRRead(30, "part0 = \"501\" or part0 = \"502\"");
        } else {
            runMRRead(40, "part1 = \"p1value2\"");
            runMRRead(50, "part1 = \"p1value1\" or part1 = \"p1value2\"");
            runMRRead(40, "part0 = \"502\"");
            runMRRead(50, "part0 = \"501\" or part0 = \"502\"");
        }

        tableSchemaTest();
        columnOrderChangeTest();
        hiveReadTest();
    }

    //test that new columns gets added to table schema
    private void tableSchemaTest() throws Exception {

        HCatSchema tableSchema = getTableSchema();

        assertEquals(4, tableSchema.getFields().size());

        //Update partition schema to have 3 fields
        partitionColumns.add(
                HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c3", serdeConstants.STRING_TYPE_NAME, "")));

        writeRecords = new ArrayList<HCatRecord>();

        for (int i = 0; i < 20; i++) {
            List<Object> objList = new ArrayList<Object>();

            objList.add(i);
            objList.add("strvalue" + i);
            objList.add("str2value" + i);

            writeRecords.add(new DefaultHCatRecord(objList));
        }

        Map<String, String> partitionMap = new HashMap<String, String>();
        partitionMap.put("part1", "p1value5");
        partitionMap.put("part0", "505");

        runMRCreate(partitionMap, partitionColumns, writeRecords, 10, true);

        tableSchema = getTableSchema();

        //assert that c3 has got added to table schema
        assertEquals(5, tableSchema.getFields().size());
        assertEquals("c1", tableSchema.getFields().get(0).getName());
        assertEquals("c2", tableSchema.getFields().get(1).getName());
        assertEquals("c3", tableSchema.getFields().get(2).getName());
        assertEquals("part1", tableSchema.getFields().get(3).getName());
        assertEquals("part0", tableSchema.getFields().get(4).getName());

        //Test that changing column data type fails
        partitionMap.clear();
        partitionMap.put("part1", "p1value6");
        partitionMap.put("part0", "506");

        partitionColumns = new ArrayList<HCatFieldSchema>();
        partitionColumns
                .add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, "")));
        partitionColumns
                .add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", serdeConstants.INT_TYPE_NAME, "")));

        IOException exc = null;
        try {
            runMRCreate(partitionMap, partitionColumns, writeRecords, 20, true);
        } catch (IOException e) {
            exc = e;
        }

        assertTrue(exc != null);
        assertTrue(exc instanceof HCatException);
        assertEquals(ErrorType.ERROR_SCHEMA_TYPE_MISMATCH, ((HCatException) exc).getErrorType());

        //Test that partition key is not allowed in data
        partitionColumns = new ArrayList<HCatFieldSchema>();
        partitionColumns
                .add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, "")));
        partitionColumns.add(
                HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, "")));
        partitionColumns.add(
                HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c3", serdeConstants.STRING_TYPE_NAME, "")));
        partitionColumns.add(
                HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("part1", serdeConstants.STRING_TYPE_NAME, "")));

        List<HCatRecord> recordsContainingPartitionCols = new ArrayList<HCatRecord>(20);
        for (int i = 0; i < 20; i++) {
            List<Object> objList = new ArrayList<Object>();

            objList.add(i);
            objList.add("c2value" + i);
            objList.add("c3value" + i);
            objList.add("p1value6");

            recordsContainingPartitionCols.add(new DefaultHCatRecord(objList));
        }

        exc = null;
        try {
            runMRCreate(partitionMap, partitionColumns, recordsContainingPartitionCols, 20, true);
        } catch (IOException e) {
            exc = e;
        }

        List<HCatRecord> records = runMRRead(20, "part1 = \"p1value6\"");
        assertEquals(20, records.size());
        records = runMRRead(20, "part0 = \"506\"");
        assertEquals(20, records.size());
        Integer i = 0;
        for (HCatRecord rec : records) {
            assertEquals(5, rec.size());
            assertEquals(rec.get(0), i);
            assertEquals(rec.get(1), "c2value" + i);
            assertEquals(rec.get(2), "c3value" + i);
            assertEquals(rec.get(3), "p1value6");
            assertEquals(rec.get(4), 506);
            i++;
        }
    }

    //check behavior while change the order of columns
    private void columnOrderChangeTest() throws Exception {

        HCatSchema tableSchema = getTableSchema();

        assertEquals(5, tableSchema.getFields().size());

        partitionColumns = new ArrayList<HCatFieldSchema>();
        partitionColumns
                .add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, "")));
        partitionColumns.add(
                HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c3", serdeConstants.STRING_TYPE_NAME, "")));
        partitionColumns.add(
                HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, "")));

        writeRecords = new ArrayList<HCatRecord>();

        for (int i = 0; i < 10; i++) {
            List<Object> objList = new ArrayList<Object>();

            objList.add(i);
            objList.add("co strvalue" + i);
            objList.add("co str2value" + i);

            writeRecords.add(new DefaultHCatRecord(objList));
        }

        Map<String, String> partitionMap = new HashMap<String, String>();
        partitionMap.put("part1", "p1value8");
        partitionMap.put("part0", "508");

        Exception exc = null;
        try {
            runMRCreate(partitionMap, partitionColumns, writeRecords, 10, true);
        } catch (IOException e) {
            exc = e;
        }

        assertTrue(exc != null);
        assertTrue(exc instanceof HCatException);
        assertEquals(ErrorType.ERROR_SCHEMA_COLUMN_MISMATCH, ((HCatException) exc).getErrorType());

        partitionColumns = new ArrayList<HCatFieldSchema>();
        partitionColumns
                .add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, "")));
        partitionColumns.add(
                HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, "")));

        writeRecords = new ArrayList<HCatRecord>();

        for (int i = 0; i < 10; i++) {
            List<Object> objList = new ArrayList<Object>();

            objList.add(i);
            objList.add("co strvalue" + i);

            writeRecords.add(new DefaultHCatRecord(objList));
        }

        runMRCreate(partitionMap, partitionColumns, writeRecords, 10, true);

        if (isTableImmutable()) {
            //Read should get 10 + 20 + 10 + 10 + 20 rows
            runMRRead(70);
        } else {
            runMRRead(90); // +20 from the duplicate publish
        }
    }

    //Test that data inserted through hcatoutputformat is readable from hive
    private void hiveReadTest() throws Exception {

        String query = "select * from " + tableName;
        int retCode = driver.run(query).getResponseCode();

        if (retCode != 0) {
            throw new Exception("Error " + retCode + " running query " + query);
        }

        ArrayList<String> res = new ArrayList<String>();
        driver.getResults(res);
        if (isTableImmutable()) {
            //Read should get 10 + 20 + 10 + 10 + 20 rows
            assertEquals(70, res.size());
        } else {
            assertEquals(90, res.size()); // +20 from the duplicate publish
        }

    }
}