org.apache.lens.lib.query.TestFilePersistentFormatter.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.lens.lib.query.TestFilePersistentFormatter.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.lens.lib.query;

import java.io.BufferedWriter;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.util.ArrayList;
import java.util.List;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;

import org.apache.lens.server.api.LensConfConstants;
import org.apache.lens.server.api.query.PersistedOutputFormatter;
import org.apache.lens.server.api.query.QueryContext;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe;

import org.testng.Assert;
import org.testng.annotations.AfterTest;
import org.testng.annotations.BeforeTest;
import org.testng.annotations.Test;

/**
 * The Class TestFilePersistentFormatter.
 */
public class TestFilePersistentFormatter extends TestAbstractFileFormatter {

    /**
     * The part file dir.
     */
    private Path partFileDir = new Path("target/partcsvfiles");

    /**
     * The part file text dir.
     */
    private Path partFileTextDir = new Path("target/parttextfiles");

    /**
     * Creates the part files.
     *
     * @throws IOException Signals that an I/O exception has occurred.
     */
    @BeforeTest
    public void createPartFiles() throws IOException {

        // create csv files
        FileSystem fs = partFileDir.getFileSystem(new Configuration());
        BufferedWriter writer = new BufferedWriter(
                new OutputStreamWriter(fs.create(new Path(partFileDir, "000000_2"))));
        writer.write("\"1\",\"one\",\"one\",\"one\",\"1\",\"1:one\",\"1=one\"\n");
        writer.close();
        writer = new BufferedWriter(new OutputStreamWriter(fs.create(new Path(partFileDir, "000001_0"))));
        writer.write("\"2\",\"two\",\"two\",\"two\",\"1,2\",\"2:two\",\"1=one,2=two\"\n");
        writer.write(
                "\"NULL\",\"three\",\"three\",\"three\",\"1,2,NULL\",\"NULL:three\",\"1=one,2=two,NULL=three\"\n");
        writer.close();
        writer = new BufferedWriter(new OutputStreamWriter(fs.create(new Path(partFileDir, "000010_1"))));
        writer.write(
                "\"4\",\"NULL\",\"NULL\",\"NULL\",\"1,2,NULL,4\",\"4:NULL\",\"1=one,2=two,NULL=three,4=NULL\"\n");
        writer.write("\"NULL\",\"NULL\",\"NULL\",\"NULL\",\"1,2,NULL,4,NULL\",\"NULL:NULL\","
                + "\"1=one,2=two,NULL=three,4=NULL,5=NULL\"\n");
        writer.close();
        writer = new BufferedWriter(new OutputStreamWriter(fs.create(new Path(partFileDir, "_SUCCESS"))));
        writer.close();

        // create text files
        fs = partFileTextDir.getFileSystem(new Configuration());
        writer = new BufferedWriter(new OutputStreamWriter(fs.create(new Path(partFileTextDir, "000000_2"))));
        writer.write("1oneoneone            11one1one       \n");
        writer.close();
        writer = new BufferedWriter(new OutputStreamWriter(fs.create(new Path(partFileTextDir, "000001_0"))));
        writer.write("2twotwotwo            122two1one       2two       \n");
        writer.write(
                "\\Nthreethreethree          12\\N\\Nthree1one       2two       \\Nthree     \n");
        writer.close();
        writer = new BufferedWriter(new OutputStreamWriter(fs.create(new Path(partFileTextDir, "000010_1"))));
        writer.write("4\\N\\N\\N12\\N44\\N1one       2two       \\Nthree     4\\N\n");
        writer.write(
                "\\N\\N\\N\\N12\\N4\\N\\N\\N1one       2two       \\Nthree     4\\N5\\N\n");
        writer.close();
        writer = new BufferedWriter(new OutputStreamWriter(fs.create(new Path(partFileTextDir, "_SUCCESS"))));
        writer.close();
    }

    /**
     * Cleanup part files.
     *
     * @throws IOException Signals that an I/O exception has occurred.
     */
    @AfterTest
    public void cleanupPartFiles() throws IOException {
        FileSystem fs = partFileDir.getFileSystem(new Configuration());
        fs.delete(partFileDir, true);
        fs.delete(partFileTextDir, true);
    }

    /*
     * (non-Javadoc)
     *
     * @see org.apache.lens.lib.query.TestAbstractFileFormatter#createFormatter()
     */
    @Override
    protected WrappedFileFormatter createFormatter() {
        return new FilePersistentFormatter();
    }

    /*
     * (non-Javadoc)
     *
     * @see org.apache.lens.lib.query.TestAbstractFileFormatter#writeAllRows(org.apache.hadoop.conf.Configuration)
     */
    @Override
    protected void writeAllRows(Configuration conf) throws IOException {
        ((PersistedOutputFormatter) formatter).addRowsFromPersistedPath(new Path(conf.get("test.partfile.dir")));
    }

    protected void setConf(Configuration conf) {
        conf.set("test.partfile.dir", partFileDir.toString());
        conf.set(LensConfConstants.QUERY_OUTPUT_HEADER,
                "\"firstcol\",\"format(secondcol,2)\",\"thirdcol\",\"fourthcol\",\"fifthcol\",\"sixthcol\",\"seventhcol\"");
        conf.set(LensConfConstants.QUERY_OUTPUT_FOOTER, "Total rows:5");
    }

    /**
     * Test csv with serde header.
     *
     * @throws IOException Signals that an I/O exception has occurred.
     */
    @Test
    public void testCSVWithSerdeHeader() throws IOException {
        Configuration conf = new Configuration();
        setConf(conf);
        conf.set(LensConfConstants.QUERY_OUTPUT_HEADER, "");
        testFormatter(conf, "UTF8", LensConfConstants.RESULT_SET_PARENT_DIR_DEFAULT, ".csv", getMockedResultSet());
        // validate rows
        Assert.assertEquals(readFinalOutputFile(new Path(formatter.getFinalOutputPath()), conf, "UTF-8"),
                getExpectedCSVRows());
    }

    /**
     * Test text files.
     *
     * @throws IOException Signals that an I/O exception has occurred.
     */
    @Test
    public void testTextFiles() throws IOException {
        Configuration conf = new Configuration();
        setConf(conf);
        conf.set("test.partfile.dir", partFileTextDir.toString());
        conf.set(LensConfConstants.QUERY_OUTPUT_FILE_EXTN, ".txt");
        conf.set(LensConfConstants.QUERY_OUTPUT_HEADER,
                "firstcolsecondcolthirdcolfourthcolfifthcolsixthcolseventhcol");
        testFormatter(conf, "UTF8", LensConfConstants.RESULT_SET_PARENT_DIR_DEFAULT, ".txt",
                getMockedResultSetWithoutComma());
        // validate rows
        Assert.assertEquals(readFinalOutputFile(new Path(formatter.getFinalOutputPath()), conf, "UTF-8"),
                getExpectedTextRowsWithoutComma());
    }

    /**
     * Test text file with serde header.
     *
     * @throws IOException Signals that an I/O exception has occurred.
     */
    @Test
    public void testTextFileWithSerdeHeader() throws IOException {
        Configuration conf = new Configuration();
        setConf(conf);
        conf.set("test.partfile.dir", partFileTextDir.toString());
        conf.set(LensConfConstants.QUERY_OUTPUT_FILE_EXTN, ".txt");
        conf.set(LensConfConstants.QUERY_OUTPUT_HEADER, "");
        conf.set(LensConfConstants.QUERY_OUTPUT_SERDE, LazySimpleSerDe.class.getCanonicalName());
        testFormatter(conf, "UTF8", LensConfConstants.RESULT_SET_PARENT_DIR_DEFAULT, ".txt",
                getMockedResultSetWithoutComma());
        // validate rows
        Assert.assertEquals(readFinalOutputFile(new Path(formatter.getFinalOutputPath()), conf, "UTF-8"),
                getExpectedTextRowsWithoutComma());
    }

    /**
     * Test text files with compression.
     *
     * @throws IOException Signals that an I/O exception has occurred.
     */
    @Test
    public void testTextFilesWithCompression() throws IOException {
        Configuration conf = new Configuration();
        setConf(conf);
        conf.set("test.partfile.dir", partFileTextDir.toString());
        conf.set(LensConfConstants.QUERY_OUTPUT_FILE_EXTN, ".txt");
        conf.setBoolean(LensConfConstants.QUERY_OUTPUT_ENABLE_COMPRESSION, true);
        conf.set(LensConfConstants.QUERY_OUTPUT_HEADER,
                "firstcolformat(secondcol,2)thirdcolfourthcolfifthcolsixthcolseventhcol");
        testFormatter(conf, "UTF8", LensConfConstants.RESULT_SET_PARENT_DIR_DEFAULT, ".txt.gz",
                getMockedResultSetWithoutComma());
        // validate rows
        Assert.assertEquals(readCompressedFile(new Path(formatter.getFinalOutputPath()), conf, "UTF-8"),
                getExpectedTextRows());
    }

    /**
     * Test text file with zip formatter.
     *
     * @throws IOException Signals that an I/O exception has occurred.
     */
    @Test
    public void testTextFileWithZipFormatter() throws IOException {
        Configuration conf = new Configuration();
        setConf(conf);
        conf.set("test.partfile.dir", partFileTextDir.toString());
        conf.set(LensConfConstants.QUERY_OUTPUT_FILE_EXTN, ".txt");
        conf.set(LensConfConstants.QUERY_OUTPUT_HEADER, "");
        conf.set(LensConfConstants.QUERY_OUTPUT_SERDE, LazySimpleSerDe.class.getCanonicalName());
        conf.setBoolean(LensConfConstants.RESULT_SPLIT_INTO_MULTIPLE, true);
        conf.setLong(LensConfConstants.RESULT_SPLIT_MULTIPLE_MAX_ROWS, 2L);
        testFormatter(conf, "UTF8", LensConfConstants.RESULT_SET_PARENT_DIR_DEFAULT, ".zip",
                getMockedResultSetWithoutComma());
        // validate rows
        List<String> actual = readZipOutputFile(new Path(formatter.getFinalOutputPath()), conf, "UTF-8");
        System.out.println("Actual rows:" + actual);
        Assert.assertEquals(actual, getExpectedTextRowsWithMultipleWithoutComma());
    }

    /**
     * Test csv with zip formatter.
     *
     * @throws IOException Signals that an I/O exception has occurred.
     */
    @Test
    public void testCSVWithZipFormatter() throws IOException {
        Configuration conf = new Configuration();
        setConf(conf);
        conf.set(LensConfConstants.QUERY_OUTPUT_HEADER, "");
        conf.setBoolean(LensConfConstants.RESULT_SPLIT_INTO_MULTIPLE, true);
        conf.setLong(LensConfConstants.RESULT_SPLIT_MULTIPLE_MAX_ROWS, 2L);
        testFormatter(conf, "UTF8", LensConfConstants.RESULT_SET_PARENT_DIR_DEFAULT, ".zip", getMockedResultSet());
        // validate rows
        List<String> actual = readZipOutputFile(new Path(formatter.getFinalOutputPath()), conf, "UTF-8");
        System.out.println("Actual rows:" + actual);
        Assert.assertEquals(actual, getExpectedCSVRowsWithMultiple());
    }

    /**
     * Test text files output path.
     *
     * @throws IOException Signals that an I/O exception has occurred.
     */
    @Test
    public void testTextFileOutputPath() throws IOException {
        Configuration conf = new Configuration();
        setConf(conf);
        conf.set("test.partfile.dir", partFileTextDir.toString());
        conf.set(LensConfConstants.QUERY_OUTPUT_FILE_EXTN, ".txt");
        conf.set(LensConfConstants.QUERY_OUTPUT_HEADER,
                "firstcolsecondcolthirdcolfourthcolfifthcolsixthcolseventhcol");
        QueryContext ctx = createContext(conf, "test.Query_1 name");
        Path expectedFinalPath = new Path(LensConfConstants.RESULT_SET_PARENT_DIR_DEFAULT,
                "test.Query_1_name-" + ctx.getQueryHandle() + ".txt");
        FileSystem fs = expectedFinalPath.getFileSystem(conf);
        expectedFinalPath = expectedFinalPath.makeQualified(fs);
        validateFormatter(conf, "UTF8", LensConfConstants.RESULT_SET_PARENT_DIR_DEFAULT, ".txt",
                getMockedResultSetWithoutComma(), ctx, expectedFinalPath);
    }

    /**
     * Test text files with a long output path.
     *
     * @throws IOException Signals that an I/O exception has occurred.
     */
    @Test
    public void testTextFileLongOutputPath() throws IOException {
        Configuration conf = new Configuration();
        setConf(conf);
        conf.set("test.partfile.dir", partFileTextDir.toString());
        conf.set(LensConfConstants.QUERY_OUTPUT_FILE_EXTN, ".txt");
        conf.set(LensConfConstants.QUERY_OUTPUT_HEADER,
                "firstcolsecondcolthirdcolfourthcolfifthcolsixthcolseventhcol");
        QueryContext ctx = createContext(conf,
                "test-Query 1^name12345678901234567890123456789012345678901234567890");
        Path expectedFinalPath = new Path(LensConfConstants.RESULT_SET_PARENT_DIR_DEFAULT,
                "test-Query_1_name123456789012345678901234567890123-" + ctx.getQueryHandle() + ".txt");
        FileSystem fs = expectedFinalPath.getFileSystem(conf);
        expectedFinalPath = expectedFinalPath.makeQualified(fs);
        validateFormatter(conf, "UTF8", LensConfConstants.RESULT_SET_PARENT_DIR_DEFAULT, ".txt",
                getMockedResultSetWithoutComma(), ctx, expectedFinalPath);
    }

    /**
     * Test zip csv files output path.
     *
     * @throws IOException Signals that an I/O exception has occurred.
     */
    @Test
    public void testCSVZipFileOutputPath() throws IOException {
        Configuration conf = new Configuration();
        setConf(conf);
        conf.setBoolean(LensConfConstants.RESULT_SPLIT_INTO_MULTIPLE, true);
        conf.setLong(LensConfConstants.RESULT_SPLIT_MULTIPLE_MAX_ROWS, 2L);
        QueryContext ctx = createContext(conf, "Test.query_1 name");
        Path expectedFinalPath = new Path(LensConfConstants.RESULT_SET_PARENT_DIR_DEFAULT,
                "Test.query_1_name-" + ctx.getQueryHandle() + ".zip");
        FileSystem fs = expectedFinalPath.getFileSystem(conf);
        expectedFinalPath = expectedFinalPath.makeQualified(fs);
        validateFormatter(conf, "UTF8", LensConfConstants.RESULT_SET_PARENT_DIR_DEFAULT, ".zip",
                getMockedResultSetWithoutComma(), ctx, expectedFinalPath);
        ZipEntry ze = null;
        ZipInputStream zin = new ZipInputStream(fs.open(expectedFinalPath));
        int i = 0;
        while ((ze = zin.getNextEntry()) != null) {
            Assert.assertEquals(ze.getName(), "Test.query_1_name-" + ctx.getQueryHandle() + "_part-" + i + ".csv");
            i++;
            zin.closeEntry();
        }
        zin.close();
    }

    protected List<String> getExpectedCSVRows() {
        return new ArrayList<String>() {
            {
                add("\"firstcol\",\"format(secondcol,2)\",\"thirdcol\",\"fourthcol\",\"fifthcol\",\"sixthcol\",\"seventhcol\"");
                add("\"1\",\"one\",\"one\",\"one\",\"1\",\"1:one\",\"1=one\"");
                add("\"2\",\"two\",\"two\",\"two\",\"1,2\",\"2:two\",\"1=one,2=two\"");
                add("\"NULL\",\"three\",\"three\",\"three\",\"1,2,NULL\",\"NULL:three\",\"1=one,2=two,NULL=three\"");
                add("\"4\",\"NULL\",\"NULL\",\"NULL\",\"1,2,NULL,4\",\"4:NULL\",\"1=one,2=two,NULL=three,4=NULL\"");
                add("\"NULL\",\"NULL\",\"NULL\",\"NULL\",\"1,2,NULL,4,NULL\",\"NULL:NULL\","
                        + "\"1=one,2=two,NULL=three,4=NULL,5=NULL\"");
                add("Total rows:5");
            }
        };
    }

    protected List<String> getExpectedTextRows() {
        return new ArrayList<String>() {
            {
                add("firstcolformat(secondcol,2)thirdcolfourthcolfifthcolsixthcolseventhcol");
                add("1oneoneone            11one1one       ");
                add("2twotwotwo            122two1one       2two       ");
                add("\\Nthreethreethree          12\\N\\Nthree1one       2two       \\Nthree     ");
                add("4\\N\\N\\N12\\N44\\N1one       2two       \\Nthree     4\\N");
                add("\\N\\N\\N\\N12\\N4\\N\\N\\N1one       2two       \\Nthree     4\\N5\\N");
                add("Total rows:5");
            }
        };
    }

    protected List<String> getExpectedCSVRowsWithoutComma() {
        return new ArrayList<String>() {
            {
                add("\"firstcol\",\"secondcol\",\"thirdcol\",\"fourthcol\",\"fifthcol\",\"sixthcol\",\"seventhcol\"");
                add("\"1\",\"one\",\"one\",\"one\",\"1\",\"1:one\",\"1=one\"");
                add("\"2\",\"two\",\"two\",\"two\",\"1,2\",\"2:two\",\"1=one,2=two\"");
                add("\"NULL\",\"three\",\"three\",\"three\",\"1,2,NULL\",\"NULL:three\",\"1=one,2=two,NULL=three\"");
                add("\"4\",\"NULL\",\"NULL\",\"NULL\",\"1,2,NULL,4\",\"4:NULL\",\"1=one,2=two,NULL=three,4=NULL\"");
                add("\"NULL\",\"NULL\",\"NULL\",\"NULL\",\"1,2,NULL,4,NULL\",\"NULL:NULL\","
                        + "\"1=one,2=two,NULL=three,4=NULL,5=NULL\"");
                add("Total rows:5");
            }
        };
    }

    protected List<String> getExpectedTextRowsWithoutComma() {
        return new ArrayList<String>() {
            {
                add("firstcolsecondcolthirdcolfourthcolfifthcolsixthcolseventhcol");
                add("1oneoneone            11one1one       ");
                add("2twotwotwo            122two1one       2two       ");
                add("\\Nthreethreethree          12\\N\\Nthree1one       2two       \\Nthree     ");
                add("4\\N\\N\\N12\\N44\\N1one       2two       \\Nthree     4\\N");
                add("\\N\\N\\N\\N12\\N4\\N\\N\\N1one       2two       \\Nthree     4\\N5\\N");
                add("Total rows:5");
            }
        };
    }

    protected List<String> getExpectedCSVRowsWithMultiple() {
        return new ArrayList<String>() {
            {

                add("\"firstcol\",\"format(secondcol,2)\",\"thirdcol\",\"fourthcol\",\"fifthcol\",\"sixthcol\",\"seventhcol\"");
                add("\"1\",\"one\",\"one\",\"one\",\"1\",\"1:one\",\"1=one\"");
                add("\"2\",\"two\",\"two\",\"two\",\"1,2\",\"2:two\",\"1=one,2=two\"");
                add("\"firstcol\",\"format(secondcol,2)\",\"thirdcol\",\"fourthcol\",\"fifthcol\",\"sixthcol\",\"seventhcol\"");
                add("\"NULL\",\"three\",\"three\",\"three\",\"1,2,NULL\",\"NULL:three\",\"1=one,2=two,NULL=three\"");
                add("\"4\",\"NULL\",\"NULL\",\"NULL\",\"1,2,NULL,4\",\"4:NULL\",\"1=one,2=two,NULL=three,4=NULL\"");
                add("\"firstcol\",\"format(secondcol,2)\",\"thirdcol\",\"fourthcol\",\"fifthcol\",\"sixthcol\",\"seventhcol\"");
                add("\"NULL\",\"NULL\",\"NULL\",\"NULL\",\"1,2,NULL,4,NULL\",\"NULL:NULL\","
                        + "\"1=one,2=two,NULL=three,4=NULL,5=NULL\"");
                add("Total rows:5");
            }
        };
    }

    protected List<String> getExpectedTextRowsWithMultiple() {
        return new ArrayList<String>() {
            {
                add("firstcolformat(secondcol,2)thirdcolfourthcolfifthcolsixthcolseventhcol");
                add("1oneoneone            11one1one       ");
                add("2twotwotwo            122two1one       2two       ");
                add("firstcolformat(secondcol,2)thirdcolfourthcolfifthcolsixthcolseventhcol");
                add("\\Nthreethreethree          12\\N\\Nthree1one       2two       \\Nthree     ");
                add("4\\N\\N\\N12\\N44\\N1one       2two       \\Nthree     4\\N");
                add("firstcolformat(secondcol,2)thirdcolfourthcolfifthcolsixthcolseventhcol");
                add("\\N\\N\\N\\N12\\N4\\N\\N\\N1one       2two       \\Nthree     4\\N5\\N");
                add("Total rows:5");
            }
        };
    }

    protected List<String> getExpectedCSVRowsWithMultipleWithoutComma() {
        return new ArrayList<String>() {
            {
                add("\"firstcol\",\"secondcol\",\"thirdcol\",\"fourthcol\",\"fifthcol\",\"sixthcol\",\"seventhcol\"");
                add("\"1\",\"one\",\"one\",\"one\",\"1\",\"1:one\",\"1=one\"");
                add("\"2\",\"two\",\"two\",\"two\",\"1,2\",\"2:two\",\"1=one,2=two\"");
                add("\"firstcol\",\"secondcol\",\"thirdcol\",\"fourthcol\",\"fifthcol\",\"sixthcol\",\"seventhcol\"");
                add("\"NULL\",\"three\",\"three\",\"three\",\"1,2,NULL\",\"NULL:three\",\"1=one,2=two,NULL=three\"");
                add("\"4\",\"NULL\",\"NULL\",\"NULL\",\"1,2,NULL,4\",\"4:NULL\",\"1=one,2=two,NULL=three,4=NULL\"");
                add("\"firstcol\",\"secondcol\",\"thirdcol\",\"fourthcol\",\"fifthcol\",\"sixthcol\",\"seventhcol\"");
                add("\"NULL\",\"NULL\",\"NULL\",\"NULL\",\"1,2,NULL,4,NULL\",\"NULL:NULL\","
                        + "\"1=one,2=two,NULL=three,4=NULL,5=NULL\"");
                add("Total rows:5");
            }
        };
    }

    protected List<String> getExpectedTextRowsWithMultipleWithoutComma() {
        return new ArrayList<String>() {
            {
                add("firstcolsecondcolthirdcolfourthcolfifthcolsixthcolseventhcol");
                add("1oneoneone            11one1one       ");
                add("2twotwotwo            122two1one       2two       ");
                add("firstcolsecondcolthirdcolfourthcolfifthcolsixthcolseventhcol");
                add("\\Nthreethreethree          12\\N\\Nthree1one       2two       \\Nthree     ");
                add("4\\N\\N\\N12\\N44\\N1one       2two       \\Nthree     4\\N");
                add("firstcolsecondcolthirdcolfourthcolfifthcolsixthcolseventhcol");
                add("\\N\\N\\N\\N12\\N4\\N\\N\\N1one       2two       \\Nthree     4\\N5\\N");
                add("Total rows:5");
            }
        };
    }
}