org.apache.carbondata.sdk.file.CSVCarbonWriterTest.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.carbondata.sdk.file.CSVCarbonWriterTest.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.carbondata.sdk.file;

import java.io.File;
import java.io.FileFilter;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.LinkedList;
import java.util.List;

import org.apache.carbondata.common.exceptions.sql.InvalidLoadOptionException;
import org.apache.carbondata.core.constants.CarbonCommonConstants;
import org.apache.carbondata.core.datastore.FileReader;
import org.apache.carbondata.core.datastore.filesystem.CarbonFile;
import org.apache.carbondata.core.datastore.impl.FileFactory;
import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier;
import org.apache.carbondata.core.metadata.datatype.DataTypes;
import org.apache.carbondata.core.metadata.datatype.StructField;
import org.apache.carbondata.core.metadata.schema.SchemaReader;
import org.apache.carbondata.core.metadata.schema.table.TableInfo;
import org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema;
import org.apache.carbondata.core.reader.CarbonFooterReaderV3;
import org.apache.carbondata.core.util.CarbonProperties;
import org.apache.carbondata.core.util.path.CarbonTablePath;
import org.apache.carbondata.format.FileFooter3;

import org.apache.commons.io.FileUtils;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;

/**
 * Test suite for {@link CSVCarbonWriter}
 */
public class CSVCarbonWriterTest {

    @Before
    public void cleanFile() {
        String path = null;
        try {
            path = new File(CSVCarbonWriterTest.class.getResource("/").getPath() + "../").getCanonicalPath()
                    .replaceAll("\\\\", "/");
        } catch (IOException e) {
            assert (false);
        }
        CarbonProperties.getInstance().addProperty(CarbonCommonConstants.CARBON_SYSTEM_FOLDER_LOCATION, path)
                .addProperty(CarbonCommonConstants.DETAIL_QUERY_BATCH_SIZE,
                        String.valueOf(CarbonCommonConstants.DETAIL_QUERY_BATCH_SIZE_DEFAULT));
        assert (TestUtil.cleanMdtFile());
    }

    @After
    public void verifyDMFile() {
        assert (!TestUtil.verifyMdtFile());
    }

    @Test
    public void testWriteFiles() throws IOException {
        String path = "./testWriteFiles";
        FileUtils.deleteDirectory(new File(path));

        Field[] fields = new Field[2];
        fields[0] = new Field("name", DataTypes.STRING);
        fields[1] = new Field("age", DataTypes.INT);

        TestUtil.writeFilesAndVerify(new Schema(fields), path);

        FileUtils.deleteDirectory(new File(path));
    }

    @Test
    public void testWriteFilesJsonSchema() throws IOException {
        String path = "./testWriteFilesJsonSchema";
        FileUtils.deleteDirectory(new File(path));

        String schema = new StringBuilder().append("[ \n").append("   {\"name\":\"string\"},\n")
                .append("   {\"age\":\"int\"},\n").append("   {\"height\":\"double\"}\n").append("]").toString();

        TestUtil.writeFilesAndVerify(Schema.parseJson(schema), path);

        FileUtils.deleteDirectory(new File(path));
    }

    @Test
    public void testWriteFilesBuildWithJsonSchema()
            throws IOException, InvalidLoadOptionException, InterruptedException {
        String path = "./testWriteFilesJsonSchema";
        FileUtils.deleteDirectory(new File(path));

        String schema = "[{name:string},{age:int},{height:double}]";
        CarbonWriterBuilder builder = CarbonWriter.builder().outputPath(path).withCsvInput(schema)
                .writtenBy("testWriteFilesBuildWithJsonSchema");

        CarbonWriter writer = builder.build();
        for (int i = 0; i < 10; i++) {
            writer.write(new String[] { "robot" + (i % 10), String.valueOf(i % 3000000),
                    String.valueOf((double) i / 2) });
        }
        writer.close();

        CarbonReader carbonReader = CarbonReader.builder(path).build();
        int i = 0;
        while (carbonReader.hasNext()) {
            Object[] row = (Object[]) carbonReader.readNextRow();
            Assert.assertEquals(row[0], "robot" + i % 10);
            System.out.println();
            i++;
        }
        carbonReader.close();
        FileUtils.deleteDirectory(new File(path));
    }

    @Test
    public void testAllPrimitiveDataType() throws IOException {
        String path = "./testWriteFiles";
        FileUtils.deleteDirectory(new File(path));

        Field[] fields = new Field[9];
        fields[0] = new Field("stringField", DataTypes.STRING);
        fields[1] = new Field("intField", DataTypes.INT);
        fields[2] = new Field("shortField", DataTypes.SHORT);
        fields[3] = new Field("longField", DataTypes.LONG);
        fields[4] = new Field("doubleField", DataTypes.DOUBLE);
        fields[5] = new Field("boolField", DataTypes.BOOLEAN);
        fields[6] = new Field("dateField", DataTypes.DATE);
        fields[7] = new Field("timeField", DataTypes.TIMESTAMP);
        fields[8] = new Field("decimalField", DataTypes.createDecimalType(8, 2));

        try {
            CarbonWriterBuilder builder = CarbonWriter.builder().outputPath(path);
            CarbonWriter writer = builder.withCsvInput(new Schema(fields)).writtenBy("CSVCarbonWriterTest").build();

            for (int i = 0; i < 100; i++) {
                Object[] row = new Object[] { "robot" + (i % 10), i, i, (Long.MAX_VALUE - i), ((double) i / 2),
                        true, "2019-03-02", "2019-02-12 03:03:34", "1.234567" };
                writer.write(row);
            }
            writer.close();
        } catch (Exception e) {
            e.printStackTrace();
            Assert.fail(e.getMessage());
        }

        File[] dataFiles = new File(path).listFiles(new FileFilter() {
            @Override
            public boolean accept(File pathname) {
                return pathname.getName().endsWith(CarbonCommonConstants.FACT_FILE_EXT);
            }
        });
        Assert.assertNotNull(dataFiles);
        Assert.assertTrue(dataFiles.length > 0);

        FileUtils.deleteDirectory(new File(path));
    }

    @Test
    public void test2Blocklet() throws IOException {
        String path = "./testWriteFiles";
        FileUtils.deleteDirectory(new File(path));

        Field[] fields = new Field[2];
        fields[0] = new Field("name", DataTypes.STRING);
        fields[1] = new Field("age", DataTypes.INT);

        TestUtil.writeFilesAndVerify(1000 * 1000, new Schema(fields), path, null, 1, 100);

        // TODO: implement reader to verify the number of blocklet in the file

        FileUtils.deleteDirectory(new File(path));
    }

    @Test
    public void test2Block() throws IOException {
        String path = "./testWriteFiles";
        FileUtils.deleteDirectory(new File(path));

        Field[] fields = new Field[2];
        fields[0] = new Field("name", DataTypes.STRING);
        fields[1] = new Field("age", DataTypes.INT);

        TestUtil.writeFilesAndVerify(1000 * 1000, new Schema(fields), path, null, 2, 2);
        File[] dataFiles = new File(path).listFiles(new FileFilter() {
            @Override
            public boolean accept(File pathname) {
                return pathname.getName().endsWith(CarbonCommonConstants.FACT_FILE_EXT);
            }
        });
        Assert.assertNotNull(dataFiles);
        Assert.assertEquals(2, dataFiles.length);

        FileUtils.deleteDirectory(new File(path));
    }

    @Test
    public void testSortColumns() throws IOException {
        String path = "./testWriteFiles";
        FileUtils.deleteDirectory(new File(path));

        Field[] fields = new Field[2];
        fields[0] = new Field("name", DataTypes.STRING);
        fields[1] = new Field("age", DataTypes.INT);

        TestUtil.writeFilesAndVerify(new Schema(fields), path, new String[] { "name" });

        // TODO: implement reader and verify the data is sorted

        FileUtils.deleteDirectory(new File(path));
    }

    @Test
    public void testPartitionOutput() {
        // TODO: test write data with partition
    }

    @Test(expected = IOException.class)
    public void testWhenWriterthrowsError() throws IOException {
        CarbonWriter carbonWriter = null;
        String path = "./testWriteFiles";

        FileUtils.deleteDirectory(new File(path));
        Field[] fields = new Field[2];
        fields[0] = new Field("name", DataTypes.STRING);
        fields[1] = new Field("age", DataTypes.INT);
        try {
            carbonWriter = CarbonWriter.builder().outputPath(path).withCsvInput(new Schema(fields))
                    .writtenBy("CSVCarbonWriterTest").build();
        } catch (InvalidLoadOptionException e) {
            e.printStackTrace();
            Assert.assertTrue(false);
        }
        carbonWriter.write("babu,1");
        carbonWriter.close();

    }

    @Test
    public void testWrongSchemaFieldsValidation() throws IOException {
        CarbonWriter carbonWriter = null;
        String path = "./testWriteFiles";

        FileUtils.deleteDirectory(new File(path));
        Field[] fields = new Field[3]; // supply 3 size fields but actual Field array value given is 2
        fields[0] = new Field("name", DataTypes.STRING);
        fields[1] = new Field("age", DataTypes.INT);
        try {
            carbonWriter = CarbonWriter.builder().outputPath(path).withCsvInput(new Schema(fields))
                    .writtenBy("CSVCarbonWriterTest").build();
        } catch (InvalidLoadOptionException e) {
            e.printStackTrace();
            Assert.assertTrue(false);
        }
        carbonWriter.write(new String[] { "babu", "1" });
        carbonWriter.close();

    }

    @Test
    public void testTaskNo() throws IOException {
        // TODO: write all data type and read by CarbonRecordReader to verify the content
        String path = "./testWriteFiles";
        FileUtils.deleteDirectory(new File(path));

        Field[] fields = new Field[2];
        fields[0] = new Field("stringField", DataTypes.STRING);
        fields[1] = new Field("intField", DataTypes.INT);

        try {
            CarbonWriterBuilder builder = CarbonWriter.builder().taskNo(5).outputPath(path);

            CarbonWriter writer = builder.withCsvInput(new Schema(fields)).writtenBy("CSVCarbonWriterTest").build();

            for (int i = 0; i < 2; i++) {
                String[] row = new String[] { "robot" + (i % 10), String.valueOf(i) };
                writer.write(row);
            }
            writer.close();

            File[] dataFiles = new File(path).listFiles(new FileFilter() {
                @Override
                public boolean accept(File pathname) {
                    return pathname.getName().endsWith(CarbonCommonConstants.FACT_FILE_EXT);
                }
            });
            Assert.assertNotNull(dataFiles);
            Assert.assertTrue(dataFiles.length > 0);
            String taskNo = CarbonTablePath.DataFileUtil.getTaskNo(dataFiles[0].getName());
            long taskID = CarbonTablePath.DataFileUtil.getTaskIdFromTaskNo(taskNo);
            Assert.assertEquals("Task Id is not matched", taskID, 5);
        } catch (Exception e) {
            e.printStackTrace();
            Assert.fail(e.getMessage());
        } finally {
            FileUtils.deleteDirectory(new File(path));
        }
    }

    // validate number of blocklets in one block
    @Test
    public void testBlocklet() throws IOException {
        String path = "./testWriteFiles";
        FileUtils.deleteDirectory(new File(path));

        Field[] fields = new Field[2];
        fields[0] = new Field("name", DataTypes.STRING);
        fields[1] = new Field("age", DataTypes.INT);

        TestUtil.writeFilesAndVerify(1000000, new Schema(fields), path, new String[] { "name" }, 3, 8);

        // read footer and verify number of blocklets
        CarbonFile folder = FileFactory.getCarbonFile(path);
        List<CarbonFile> files = folder.listFiles(true);
        List<CarbonFile> dataFiles = new LinkedList<>();
        for (CarbonFile file : files) {
            if (file.getName().endsWith(CarbonTablePath.CARBON_DATA_EXT)) {
                dataFiles.add(file);
            }
        }
        for (CarbonFile dataFile : dataFiles) {
            FileReader fileReader = FileFactory.getFileHolder(FileFactory.getFileType(dataFile.getPath()));
            ByteBuffer buffer = fileReader.readByteBuffer(FileFactory.getUpdatedFilePath(dataFile.getPath()),
                    dataFile.getSize() - 8, 8);
            fileReader.finish();
            CarbonFooterReaderV3 footerReader = new CarbonFooterReaderV3(dataFile.getAbsolutePath(),
                    buffer.getLong());
            FileFooter3 footer = footerReader.readFooterVersion3();
            Assert.assertEquals(2, footer.blocklet_index_list.size());
            Assert.assertEquals(2, footer.blocklet_info_list3.size());
        }
        FileUtils.deleteDirectory(new File(path));
    }

    @Test
    public void testFloatDataType() throws IOException {
        String path = "./testWriteFiles";
        FileUtils.deleteDirectory(new File(path));

        Field[] fields = new Field[3];
        fields[0] = new Field("stringField", DataTypes.STRING);
        fields[1] = new Field("floatField", DataTypes.FLOAT);
        fields[2] = new Field("doubleField", DataTypes.DOUBLE);

        try {
            CarbonWriterBuilder builder = CarbonWriter.builder().taskNo(5).outputPath(path);
            CarbonWriter writer = builder.withCsvInput(new Schema(fields)).writtenBy("CSVCarbonWriterTest").build();
            for (int i = 0; i < 15; i++) {
                String[] row = new String[] { "robot" + (i % 10), String.valueOf(i + "." + i),
                        String.valueOf(i + "." + i) };
                writer.write(row);
            }
            writer.close();
            TableInfo tableInfo = SchemaReader.inferSchema(AbsoluteTableIdentifier.from(path, "", ""), false);
            List<String> dataTypes = new ArrayList<>();
            for (ColumnSchema columnSchema : tableInfo.getFactTable().getListOfColumns()) {
                dataTypes.add(columnSchema.getDataType().toString());
            }
            assert (dataTypes.contains("STRING"));
            assert (dataTypes.contains("DOUBLE"));
            assert (dataTypes.contains("FLOAT"));
        } catch (Exception e) {
            e.printStackTrace();
            Assert.fail();
        } finally {
            FileUtils.deleteDirectory(new File(path));
        }
    }

    @Test
    public void testByteDataType() throws IOException {
        String path = "./testWriteFiles";
        FileUtils.deleteDirectory(new File(path));

        Field[] fields = new Field[2];
        fields[0] = new Field("stringField", DataTypes.STRING);
        fields[1] = new Field("byteField", DataTypes.BYTE);

        try {
            CarbonWriterBuilder builder = CarbonWriter.builder().taskNo(5).outputPath(path);
            CarbonWriter writer = builder.withCsvInput(new Schema(fields)).writtenBy("CSVCarbonWriterTest").build();
            for (int i = 0; i < 15; i++) {
                String[] row = new String[] { "robot" + (i % 10), "" + i };
                writer.write(row);
            }
            writer.close();
            TableInfo tableInfo = SchemaReader.inferSchema(AbsoluteTableIdentifier.from(path, "", ""), false);
            List<String> dataTypes = new ArrayList<>();
            for (ColumnSchema columnSchema : tableInfo.getFactTable().getListOfColumns()) {
                dataTypes.add(columnSchema.getDataType().toString());
            }
            assert (dataTypes.contains("STRING"));
            assert (dataTypes.contains("BYTE"));
        } catch (Exception e) {
            e.printStackTrace();
            Assert.fail();
        } finally {
            FileUtils.deleteDirectory(new File(path));
        }
    }

    @Test
    public void testReadingOfByteAndFloatWithCarbonReader() throws IOException {
        String path = "./testWriteFiles";
        FileUtils.deleteDirectory(new File(path));

        Field[] fields = new Field[3];
        fields[0] = new Field("stringField", DataTypes.STRING);
        fields[1] = new Field("byteField", DataTypes.BYTE);
        fields[2] = new Field("floatField", DataTypes.FLOAT);

        try {
            CarbonWriterBuilder builder = CarbonWriter.builder().taskNo(5).outputPath(path);
            CarbonWriter writer = builder.withCsvInput(new Schema(fields)).writtenBy("CSVCarbonWriterTest").build();
            for (int i = 0; i < 15; i++) {
                String[] row = new String[] { "robot" + (i % 10), "" + i, i + "." + i };
                writer.write(row);
            }
            writer.close();
            CarbonReader carbonReader = new CarbonReaderBuilder(path, "table1").build();
            int i = 0;
            while (carbonReader.hasNext()) {
                Object[] actualRow = (Object[]) carbonReader.readNextRow();
                String[] expectedRow = new String[] { "robot" + (i % 10), "" + i, i + "." + i };
                for (int j = 0; j < 3; j++) {
                    actualRow[j].toString().equalsIgnoreCase(expectedRow[j]);
                }
                assert (actualRow[1] instanceof Byte);
                assert (actualRow[2] instanceof Float);
                i++;
            }
            carbonReader.close();
        } catch (Exception e) {
            e.printStackTrace();
            Assert.fail();
        } finally {
            FileUtils.deleteDirectory(new File(path));
        }
    }

    @Test
    public void testWritingAndReadingStructOfFloat() throws IOException {
        String path = "./testWriteFiles";
        FileUtils.deleteDirectory(new File(path));

        StructField[] fields = new StructField[3];
        fields[0] = new StructField("stringField", DataTypes.STRING);
        fields[1] = new StructField("byteField", DataTypes.BYTE);
        fields[2] = new StructField("floatField", DataTypes.FLOAT);

        Field structType = new Field("structField", "struct", Arrays.asList(fields));

        try {
            CarbonWriterBuilder builder = CarbonWriter.builder().taskNo(5).outputPath(path);
            CarbonWriter writer = builder.withCsvInput(new Schema(new Field[] { structType }))
                    .writtenBy("CSVCarbonWriterTest").build();
            for (int i = 0; i < 15; i++) {
                String[] row = new String[] { "robot" + (i % 10) + "\001" + i + "\001" + i + "." + i };
                writer.write(row);
            }
            writer.close();
            //TODO: CarbonReader has a bug which does not allow reading complex. Once it is fixed below validation can be enabled
            //      CarbonReader carbonReader =
            //          new CarbonReaderBuilder(path, "table121").projection(new String[]{"structfield"}).build(TestUtil.configuration);
            //      for (int i = 0; i < 15; i++) {
            //        Object[] actualRow = (Object[])(carbonReader.readNextRow());
            //        String[] expectedRow = new String[] { "robot" + (i % 10), "" + i, i + "." + i };
            //        for (int j = 0; j < 3; j++) {
            //          ((Object[])actualRow[0])[j].toString().equalsIgnoreCase(expectedRow[j]);
            //        }
            //      }
        } catch (Exception e) {
            e.printStackTrace();
            Assert.fail();
        } finally {
            FileUtils.deleteDirectory(new File(path));
        }
    }

    @Test
    public void testWritingAndReadingArrayOfFloatAndByte() throws IOException {
        String path = "./testWriteFiles";
        FileUtils.deleteDirectory(new File(path));

        StructField[] fields = new StructField[1];
        fields[0] = new StructField("floatField", DataTypes.FLOAT);

        Field structType1 = new Field("floatarray", "array", Arrays.asList(fields));
        StructField[] fields2 = new StructField[1];
        fields2[0] = new StructField("byteField", DataTypes.BYTE);
        Field structType2 = new Field("bytearray", "array", Arrays.asList(fields2));

        try {
            CarbonWriterBuilder builder = CarbonWriter.builder().taskNo(5).outputPath(path);
            CarbonWriter writer = builder.withCsvInput(new Schema(new Field[] { structType1, structType2 }))
                    .writtenBy("CSVCarbonWriterTest").build();
            for (int i = 0; i < 15; i++) {
                String[] row = new String[] { "1.0\0012.0\0013.0", "1\0012\0013" };
                writer.write(row);
            }
            writer.close();
        } catch (Exception e) {
            e.printStackTrace();
            Assert.fail();
        } finally {
            FileUtils.deleteDirectory(new File(path));
        }
    }

    @Test
    public void testWithTableProperties() throws IOException {
        String path = "./testWriteFiles";
        FileUtils.deleteDirectory(new File(path));

        Field[] fields = new Field[2];
        fields[0] = new Field("name", DataTypes.STRING);
        fields[1] = new Field("age", DataTypes.INT);

        try {
            CarbonWriter writer = CarbonWriter.builder().taskNo(5).outputPath(path).withCsvInput(new Schema(fields))
                    .writtenBy("CSVCarbonWriterTest").withTableProperty("sort_columns", "name").build();
            writer.write(new String[] { "name3", "21" });
            writer.write(new String[] { "name1", "7" });
            writer.write(new String[] { "name2", "18" });
            writer.close();

            CarbonReader reader = CarbonReader.builder(path, "test").build();
            int i = 0;
            while (reader.hasNext()) {
                i++;
                Object[] row = (Object[]) reader.readNextRow();
                Assert.assertTrue(("name" + i).equalsIgnoreCase(row[0].toString()));
            }
        } catch (Exception e) {
            e.printStackTrace();
            Assert.fail();
        } finally {
            FileUtils.deleteDirectory(new File(path));
        }
    }

}