org.apache.lens.lib.query.TestFileSerdeFormatter.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.lens.lib.query.TestFileSerdeFormatter.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.lens.lib.query;

import java.io.IOException;
import java.util.*;

import org.apache.lens.api.query.ResultRow;
import org.apache.lens.server.api.LensConfConstants;
import org.apache.lens.server.api.query.InMemoryOutputFormatter;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.serde2.SerDe;
import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe;
import org.apache.hadoop.io.Text;

import org.testng.Assert;
import org.testng.annotations.Test;

/**
 * The Class TestFileSerdeFormatter.
 */
public class TestFileSerdeFormatter extends TestAbstractFileFormatter {

    /*
     * (non-Javadoc)
     *
     * @see org.apache.lens.lib.query.TestAbstractFileFormatter#testFormatter()
     */
    @Test
    public void testFormatter() throws IOException {
        super.testFormatter();
        validateSerde(LensConfConstants.DEFAULT_OUTPUT_SERDE, Text.class.getCanonicalName());
    }

    /**
     * Test serde.
     *
     * @throws IOException Signals that an I/O exception has occurred.
     */
    @Test
    public void testSerde() throws IOException {
        Configuration conf = new Configuration();
        conf.set(LensConfConstants.QUERY_OUTPUT_FILE_EXTN, ".txt");
        conf.set(LensConfConstants.QUERY_OUTPUT_SERDE, LazySimpleSerDe.class.getCanonicalName());
        testFormatter(conf, "UTF8", LensConfConstants.RESULT_SET_PARENT_DIR_DEFAULT, ".txt",
                getMockedResultSetWithoutComma());
        validateSerde(LazySimpleSerDe.class.getCanonicalName(), Text.class.getCanonicalName());

        // validate rows
        Assert.assertEquals(readFinalOutputFile(new Path(formatter.getFinalOutputPath()), conf, "UTF-8"),
                getExpectedTextRowsWithoutComma());
    }

    /**
     * Test compression with custom serde.
     *
     * @throws IOException Signals that an I/O exception has occurred.
     */
    @Test
    public void testCompressionWithCustomSerde() throws IOException {
        Configuration conf = new Configuration();
        conf.set(LensConfConstants.QUERY_OUTPUT_FILE_EXTN, ".txt");
        conf.set(LensConfConstants.QUERY_OUTPUT_SERDE, LazySimpleSerDe.class.getCanonicalName());
        conf.setBoolean(LensConfConstants.QUERY_OUTPUT_ENABLE_COMPRESSION, true);
        testFormatter(conf, "UTF8", LensConfConstants.RESULT_SET_PARENT_DIR_DEFAULT, ".txt.gz",
                getMockedResultSetWithoutComma());
        validateSerde(LazySimpleSerDe.class.getCanonicalName(), Text.class.getCanonicalName());
        // validate rows
        Assert.assertEquals(readCompressedFile(new Path(formatter.getFinalOutputPath()), conf, "UTF-8"),
                getExpectedTextRowsWithoutComma());
    }

    /**
     * Test text file with zip formatter.
     *
     * @throws IOException Signals that an I/O exception has occurred.
     */
    @Test
    public void testTextFileWithZipFormatter() throws IOException {
        Configuration conf = new Configuration();
        setConf(conf);
        conf.set(LensConfConstants.QUERY_OUTPUT_FILE_EXTN, ".txt");
        conf.set(LensConfConstants.QUERY_OUTPUT_SERDE, LazySimpleSerDe.class.getCanonicalName());
        conf.setBoolean(LensConfConstants.RESULT_SPLIT_INTO_MULTIPLE, true);
        conf.setLong(LensConfConstants.RESULT_SPLIT_MULTIPLE_MAX_ROWS, 2L);
        testFormatter(conf, "UTF8", LensConfConstants.RESULT_SET_PARENT_DIR_DEFAULT, ".zip",
                getMockedResultSetWithoutComma());
        // validate rows
        List<String> actual = readZipOutputFile(new Path(formatter.getFinalOutputPath()), conf, "UTF-8");
        Assert.assertEquals(actual, getExpectedTextRowsWithMultipleWithoutComma());
    }

    /**
     * Test csv with zip formatter.
     *
     * @throws IOException Signals that an I/O exception has occurred.
     */
    @Test
    public void testCSVWithZipFormatter() throws IOException {
        Configuration conf = new Configuration();
        setConf(conf);
        conf.setBoolean(LensConfConstants.RESULT_SPLIT_INTO_MULTIPLE, true);
        conf.setLong(LensConfConstants.RESULT_SPLIT_MULTIPLE_MAX_ROWS, 2L);
        testFormatter(conf, "UTF8", LensConfConstants.RESULT_SET_PARENT_DIR_DEFAULT, ".zip", getMockedResultSet());
        // validate rows
        List<String> actual = readZipOutputFile(new Path(formatter.getFinalOutputPath()), conf, "UTF-8");
        Assert.assertEquals(actual, getExpectedCSVRowsWithMultiple());
    }

    /**
     * Validate serde.
     *
     * @param serdeClassName      the serde class name
     * @param serializedClassName the serialized class name
     */
    private void validateSerde(String serdeClassName, String serializedClassName) {
        // check serde
        SerDe outputSerde = ((FileSerdeFormatter) formatter).getSerde();
        Assert.assertEquals(serdeClassName, outputSerde.getClass().getCanonicalName());
        Assert.assertEquals(serializedClassName, outputSerde.getSerializedClass().getCanonicalName());

    }

    private List<ResultRow> getTestRows() {
        List<ResultRow> rows = new ArrayList<ResultRow>();
        List<Object> elements = new ArrayList<Object>();
        Map<Integer, String> mapElements = new LinkedHashMap<Integer, String>();
        mapElements.put(1, "one");
        elements.add(1);
        elements.add("one");
        elements.add("one");
        elements.add("one");
        elements.add(Arrays.asList(new Byte((byte) 1)));
        elements.add(Arrays.asList(1, "one"));
        elements.add(mapElements);
        rows.add(new ResultRow(elements));

        mapElements = new LinkedHashMap<Integer, String>();
        mapElements.put(1, "one");
        mapElements.put(2, "two, 3=three");
        elements = new ArrayList<Object>();
        elements.add(2);
        elements.add("two");
        elements.add("two");
        elements.add("two");
        elements.add(Arrays.asList(new Byte((byte) 1), new Byte((byte) 2)));
        elements.add(Arrays.asList(2, "two"));
        elements.add(mapElements);
        rows.add(new ResultRow(elements));

        mapElements = new LinkedHashMap<Integer, String>();
        mapElements.put(1, "one");
        mapElements.put(2, "two");
        mapElements.put(null, "three");
        elements = new ArrayList<Object>();
        elements.add(null);
        elements.add("three");
        elements.add("three");
        elements.add("three");
        elements.add(Arrays.asList(new Byte((byte) 1), new Byte((byte) 2), null));
        elements.add(Arrays.asList(null, "three"));
        elements.add(mapElements);
        rows.add(new ResultRow(elements));

        mapElements = new LinkedHashMap<Integer, String>();
        mapElements.put(1, "one");
        mapElements.put(2, "two");
        mapElements.put(null, "three");
        mapElements.put(4, null);
        elements = new ArrayList<Object>();
        elements.add(4);
        elements.add(null);
        elements.add(null);
        elements.add(null);
        elements.add(Arrays.asList(new Byte((byte) 1), new Byte((byte) 2), null, new Byte((byte) 4)));
        elements.add(Arrays.asList(4, null));
        elements.add(mapElements);
        rows.add(new ResultRow(elements));

        mapElements = new LinkedHashMap<Integer, String>();
        mapElements.put(1, "one");
        mapElements.put(2, "two");
        mapElements.put(null, "three");
        mapElements.put(4, null);
        mapElements.put(5, null);
        elements = new ArrayList<Object>();
        elements.add(null);
        elements.add(null);
        elements.add(null);
        elements.add(null);
        elements.add(Arrays.asList(new Byte((byte) 1), new Byte((byte) 2), null, new Byte((byte) 4), null));
        elements.add(Arrays.asList(null, null));
        elements.add(mapElements);
        rows.add(new ResultRow(elements));

        return rows;
    }

    /*
     * (non-Javadoc)
     *
     * @see org.apache.lens.lib.query.TestAbstractFileFormatter#createFormatter()
     */
    @Override
    protected WrappedFileFormatter createFormatter() {
        return new FileSerdeFormatter();
    }

    /*
     * (non-Javadoc)
     *
     * @see org.apache.lens.lib.query.TestAbstractFileFormatter#writeAllRows(org.apache.hadoop.conf.Configuration)
     */
    @Override
    protected void writeAllRows(Configuration conf) throws IOException {
        for (ResultRow row : getTestRows()) {
            ((InMemoryOutputFormatter) formatter).writeRow(row);
        }
    }

    protected List<String> getExpectedCSVRows() {
        return new ArrayList<String>() {
            {
                add("\"firstcol\",\"format(secondcol,2)\",\"thirdcol\",\"fourthcol\",\"fifthcol\",\"sixthcol\",\"seventhcol\"");
                add("\"1\",\"one\",\"one\",\"one\",\"[1]\",\"[1, one]\",\"{1=one}\"");
                add("\"2\",\"two\",\"two\",\"two\",\"[1, 2]\",\"[2, two]\",\"{1=one, 2=two, 3=three}\"");
                add("\"NULL\",\"three\",\"three\",\"three\",\"[1, 2, null]\",\"[null, three]\",\"{1=one, 2=two, null=three}\"");
                add("\"4\",\"NULL\",\"NULL\",\"NULL\",\"[1, 2, null, 4]\",\"[4, null]\","
                        + "\"{1=one, 2=two, null=three, 4=null}\"");
                add("\"NULL\",\"NULL\",\"NULL\",\"NULL\",\"[1, 2, null, 4, null]\",\"[null, null]\","
                        + "\"{1=one, 2=two, null=three, 4=null, 5=null}\"");
                add("Total rows:5");
            }
        };
    }

    protected List<String> getExpectedTextRows() {
        List<String> txtRows = new ArrayList<String>();
        txtRows.add("firstcolformat(secondcol,2)thirdcolfourthcolfifthcolsixthcolseventhcol");
        txtRows.add("1oneoneone            11one1one       ");
        txtRows.add("2twotwotwo            122two1one       2two       ");
        txtRows.add("\\Nthreethreethree          12\\N\\Nthree1one       2two       \\Nthree     ");
        txtRows.add("4\\N\\N\\N12\\N44\\N1one       2two       \\Nthree     4\\N");
        txtRows.add("\\N\\N\\N\\N12\\N4\\N\\N\\N1one       2two       \\Nthree     4\\N5\\N");
        txtRows.add("Total rows:5");
        return txtRows;
    }

    protected List<String> getExpectedCSVRowsWithoutComma() {
        List<String> csvRows = new ArrayList<String>();
        csvRows.add(
                "\"firstcol\",\"secondcol\",\"thirdcol\",\"fourthcol\",\"fifthcol\",\"sixthcol\",\"seventhcol\"");
        csvRows.add("\"1\",\"one\",\"one\",\"one\",\"1\",\"1:one\",\"1=one\"");
        csvRows.add("\"2\",\"two\",\"two\",\"two\",\"1,2\",\"2:two\",\"1=one,2=two\"");
        csvRows.add(
                "\"NULL\",\"three\",\"three\",\"three\",\"1,2,NULL\",\"NULL:three\",\"1=one,2=two,NULL=three\"");
        csvRows.add("\"4\",\"NULL\",\"NULL\",\"NULL\",\"1,2,NULL,4\",\"4:NULL\",\"1=one,2=two,NULL=three,4=NULL\"");
        csvRows.add("\"NULL\",\"NULL\",\"NULL\",\"NULL\",\"1,2,NULL,4,NULL\",\"NULL:NULL\","
                + "\"1=one,2=two,NULL=three,4=NULL,5=NULL\"");
        csvRows.add("Total rows:5");
        return csvRows;
    }

    protected List<String> getExpectedTextRowsWithoutComma() {
        return new ArrayList<String>() {
            {
                add("firstcol\u0001secondcol\u0001thirdcol\u0001fourthcol\u0001fifthcol\u0001sixthcol\u0001seventhcol");
                add("1\u0001one\u0001one\u0001one            \u0001[1]\u0001[1, one]\u0001{1=one}");
                add("2\u0001two\u0001two\u0001two            \u0001[1, 2]\u0001[2, two]\u0001{1=one, 2=two, 3=three}");
                add("\\N\u0001three\u0001three\u0001three          \u0001[1, 2, null]\u0001"
                        + "[null, three]\u0001{1=one, 2=two, null=three}");
                add("4\u0001\\N\u0001\\N\u0001\\N\u0001[1, 2, null, 4]\u0001[4, null]\u0001{1=one, 2=two, null=three, 4=null}");
                add("\\N\u0001\\N\u0001\\N\u0001\\N\u0001[1, 2, null, 4, null]\u0001[null, null]"
                        + "\u0001{1=one, 2=two, null=three, 4=null, 5=null}");
                add("Total rows:5");
            }
        };
    }

    protected List<String> getExpectedCSVRowsWithMultiple() {
        return new ArrayList<String>() {
            {
                add("\"firstcol\",\"format(secondcol,2)\",\"thirdcol\",\"fourthcol\",\"fifthcol\",\"sixthcol\",\"seventhcol\"");
                add("\"1\",\"one\",\"one\",\"one\",\"[1]\",\"[1, one]\",\"{1=one}\"");
                add("\"2\",\"two\",\"two\",\"two\",\"[1, 2]\",\"[2, two]\",\"{1=one, 2=two, 3=three}\"");
                add("\"firstcol\",\"format(secondcol,2)\",\"thirdcol\",\"fourthcol\",\"fifthcol\",\"sixthcol\",\"seventhcol\"");
                add("\"NULL\",\"three\",\"three\",\"three\",\"[1, 2, null]\",\"[null, three]\",\"{1=one, 2=two, null=three}\"");
                add("\"4\",\"NULL\",\"NULL\",\"NULL\",\"[1, 2, null, 4]\",\"[4, null]\","
                        + "\"{1=one, 2=two, null=three, 4=null}\"");
                add("\"firstcol\",\"format(secondcol,2)\",\"thirdcol\",\"fourthcol\",\"fifthcol\",\"sixthcol\",\"seventhcol\"");
                add("\"NULL\",\"NULL\",\"NULL\",\"NULL\",\"[1, 2, null, 4, null]\","
                        + "\"[null, null]\",\"{1=one, 2=two, null=three, 4=null, 5=null}\"");
                add("Total rows:5");
            }
        };
    }

    protected List<String> getExpectedTextRowsWithMultiple() {
        List<String> txtRows = new ArrayList<String>();
        txtRows.add("firstcolformat(secondcol,2)thirdcolfourthcolfifthcolsixthcolseventhcol");
        txtRows.add("1oneoneone            11one1one       ");
        txtRows.add("2twotwotwo            122two1one       2two       ");
        txtRows.add("firstcolformat(secondcol,2)thirdcolfourthcolfifthcolsixthcolseventhcol");
        txtRows.add("\\Nthreethreethree          12\\N\\Nthree1one       2two       \\Nthree     ");
        txtRows.add("4\\N\\N\\N12\\N44\\N1one       2two       \\Nthree     4\\N");
        txtRows.add("firstcolformat(secondcol,2)thirdcolfourthcolfifthcolsixthcolseventhcol");
        txtRows.add("\\N\\N\\N\\N12\\N4\\N\\N\\N1one       2two       \\Nthree     4\\N5\\N");
        txtRows.add("Total rows:5");
        return txtRows;
    }

    protected List<String> getExpectedCSVRowsWithMultipleWithoutComma() {
        List<String> csvRows = new ArrayList<String>();
        csvRows.add(
                "\"firstcol\",\"secondcol\",\"thirdcol\",\"fourthcol\",\"fifthcol\",\"sixthcol\",\"seventhcol\"");
        csvRows.add("\"1\",\"one\",\"one\",\"one\",\"1\",\"1:one\",\"1=one\"");
        csvRows.add("\"2\",\"two\",\"two\",\"two\",\"1,2\",\"2:two\",\"1=one,2=two\"");
        csvRows.add(
                "\"firstcol\",\"secondcol\",\"thirdcol\",\"fourthcol\",\"fifthcol\",\"sixthcol\",\"seventhcol\"");
        csvRows.add(
                "\"NULL\",\"three\",\"three\",\"three\",\"1,2,NULL\",\"NULL:three\",\"1=one,2=two,NULL=three\"");
        csvRows.add("\"4\",\"NULL\",\"NULL\",\"NULL\",\"1,2,NULL,4\",\"4:NULL\",\"1=one,2=two,NULL=three,4=NULL\"");
        csvRows.add(
                "\"firstcol\",\"secondcol\",\"thirdcol\",\"fourthcol\",\"fifthcol\",\"sixthcol\",\"seventhcol\"");
        csvRows.add("\"NULL\",\"NULL\",\"NULL\",\"NULL\",\"1,2,NULL,4,NULL\",\"NULL:NULL\","
                + "\"1=one,2=two,NULL=three,4=NULL,5=NULL\"");
        csvRows.add("Total rows:5");
        return csvRows;
    }

    protected List<String> getExpectedTextRowsWithMultipleWithoutComma() {
        return new ArrayList<String>() {
            {
                add("firstcol\u0001secondcol\u0001thirdcol\u0001fourthcol\u0001fifthcol\u0001sixthcol\u0001seventhcol");
                add("1\u0001one\u0001one\u0001one            \u0001[1]\u0001[1, one]\u0001{1=one}");
                add("2\u0001two\u0001two\u0001two            \u0001[1, 2]\u0001[2, two]\u0001{1=one, 2=two, 3=three}");
                add("firstcol\u0001secondcol\u0001thirdcol\u0001fourthcol\u0001fifthcol\u0001sixthcol\u0001seventhcol");
                add("\\N\u0001three\u0001three\u0001three          \u0001[1, 2, null]"
                        + "\u0001[null, three]\u0001{1=one, 2=two, null=three}");
                add("4\u0001\\N\u0001\\N\u0001\\N\u0001[1, 2, null, 4]\u0001[4, null]\u0001{1=one, 2=two, null=three, 4=null}");
                add("firstcol\u0001secondcol\u0001thirdcol\u0001fourthcol\u0001fifthcol\u0001sixthcol\u0001seventhcol");
                add("\\N\u0001\\N\u0001\\N\u0001\\N\u0001[1, 2, null, 4, null]"
                        + "\u0001[null, null]\u0001{1=one, 2=two, null=three, 4=null, 5=null}");
                add("Total rows:5");
            }
        };
    }

}