ColumnStoragePerformanceTest.java Source code

Java tutorial

Introduction

Here is the source code for ColumnStoragePerformanceTest.java

Source

/**
* Tencent is pleased to support the open source community by making TDW available.
* Copyright (C) 2014 THL A29 Limited, a Tencent company. All rights reserved.
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use 
* this file except in compliance with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed 
* under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS 
* OF ANY KIND, either express or implied. See the License for the specific language governing
* permissions and limitations under the License.
*/

import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.util.ArrayList;
import java.util.Vector;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;

import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.RecordReader;

import ColumnStorage.ColumnStorageClient;
import Comm.ConstVar;
import FormatStorage.FieldMap;
import FormatStorage.FormatDataFile;
import FormatStorage.Head;
import FormatStorage.Unit;
import FormatStorage.FieldMap.Field;
import FormatStorage.Unit.FieldValue;
import FormatStorage.Unit.Record;

public class ColumnStoragePerformanceTest {
    static File file = new File("ColumnStoragePerformanceTest.result");
    static FileOutputStream output = null;

    static String textPrefix = "/user/tdwadmin/se_test/cs/";
    static String columnPrefix = "/user/tdwadmin/se_test/cs/perf/";
    static String formatPrefix = "/user/tdwadmin/se_test/cs/";
    static String textFilename = textPrefix + "Text";
    static String formatStorageFilename = formatPrefix + "formatStorageFile";

    static String byteFileName = columnPrefix + "Column_Byte";
    static String shortFileName = columnPrefix + "Column_Short";
    static String intFileName = columnPrefix + "Column_Int";
    static String longFileName = columnPrefix + "Column_Long";
    static String floatFileName = columnPrefix + "Column_Float";
    static String doubleFileName = columnPrefix + "Column_Double";
    static String stringFileName = columnPrefix + "Column_String";
    static String multiFileNameString = columnPrefix + "Column_Multi";

    static int count = 1000 * 10000;
    static Configuration conf = null;

    static FormatDataFile[] fds = new FormatDataFile[35];
    static String[] names = { byteFileName, shortFileName, intFileName, longFileName, floatFileName, doubleFileName,
            stringFileName };
    static byte[] types = { ConstVar.FieldType_Byte, ConstVar.FieldType_Short, ConstVar.FieldType_Int,
            ConstVar.FieldType_Long, ConstVar.FieldType_Float, ConstVar.FieldType_Double,
            ConstVar.FieldType_String };
    static int[] lens = { ConstVar.Sizeof_Byte, ConstVar.Sizeof_Short, ConstVar.Sizeof_Int, ConstVar.Sizeof_Long,
            ConstVar.Sizeof_Float, ConstVar.Sizeof_Double, 0 };
    static Object[] values = { (byte) 1, (short) 2, (int) 3, (long) 4, (float) 5.5, (double) 6.6,
            (String) "hello konten" };

    static byte compress = 1;

    public static void main(String[] argv) {
        try {
            if (argv.length != 3) {
                System.out.println(
                        "usage: ColumnStroageStabilityTest cmd[write | readSeq | readRand] Count Compress\n");
                return;
            }

            String cmd = argv[0];
            count = Integer.valueOf(argv[1]);
            compress = Byte.valueOf(argv[2]);

            System.out.println("cmd:" + cmd + ",count:" + count);

            output = new FileOutputStream(file);

            String writeCmd = "write";
            String readSeq = "readSeq";
            String readRand = "readRand";

            conf = new Configuration();
            conf.setInt("dfs.replication", 1);

            if (cmd.equals(writeCmd)) {
                doInitFile();
            } else if (cmd.equals(readRand)) {
                doReadRand();
            } else {
                doReadSeq();
            }

            output.close();
        } catch (Exception e) {
            e.printStackTrace();
            System.out.println("get exception:" + e.getMessage());
        }
    }

    static void doInitFile() throws Exception {
        doInitColumnStorageFile();
    }

    static void doInitTextFile() {
        try {
            Path path = new Path(textFilename);
            FileSystem fs = FileSystem.get(conf);
            FSDataOutputStream out = fs.create(path);

            OutputStream stream = new BufferedOutputStream(out);
            BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(stream));

            String value = "111,22222,33333333,444444444444,5555555.5555,6666666666.666666,hello konten,111,22222,33333333,444444444444,5555555.5555,6666666666.666666,hello konten,111,22222,33333333,444444444444,5555555.5555,6666666666.666666,hello konten,111,22222,33333333,444444444444,5555555.5555,6666666666.666666,hello konten,111,22222,33333333,444444444444,5555555.5555,6666666666.666666,hello konten\n";

            long begin = System.currentTimeMillis();

            for (int i = 0; i < count; i++) {
                writer.write(value);

                if (i % 1000000 == 0) {
                    String string = "write " + i + " record, delay: "
                            + ((System.currentTimeMillis() - begin) / 1000) + " s \n";
                    output.write(string.getBytes());
                }
            }
            writer.close();
            out.close();

            long end = System.currentTimeMillis();

            String string = "write " + count + " record over(text), delay: " + ((end - begin) / 1000) + " s \n";
            output.write(string.getBytes());
            System.out.println(string);
        } catch (Exception e) {
            e.printStackTrace();
            System.out.println(e.getMessage());
        }
    }

    static void initOtherFD() throws Exception {
        for (int i = 1; i < 5; i++) {
            for (int k = 0; k < 7; k++) {
                String fileName = names[k] + (i * 7 + k);
                System.out.println("write single file:" + fileName);
                FieldMap fieldMap = new FieldMap();
                fieldMap.addField(new Field(types[k], lens[k], (short) (i * 7 + k)));

                Head head = new Head();
                head.setFieldMap(fieldMap);
                head.setCompress(compress);

                fds[i * 7 + k] = new FormatDataFile(conf);
                fds[i * 7 + k].create(fileName, head);
            }
        }
    }

    static void closeOtherFD() throws Exception {
        for (int i = 0; i < 35; i++) {
            if (fds[i] != null) {
                fds[i].close();
            }
        }
    }

    public static void doInitColumnStorageFile() throws Exception {
        System.out.println("write multi file:" + multiFileNameString);

        FieldMap fieldMap06 = new FieldMap();
        fieldMap06.addField(new Field(ConstVar.FieldType_Byte, ConstVar.Sizeof_Byte, (short) 0));
        fieldMap06.addField(new Field(ConstVar.FieldType_Short, ConstVar.Sizeof_Short, (short) 1));
        fieldMap06.addField(new Field(ConstVar.FieldType_Int, ConstVar.Sizeof_Int, (short) 2));
        fieldMap06.addField(new Field(ConstVar.FieldType_Long, ConstVar.Sizeof_Long, (short) 3));
        fieldMap06.addField(new Field(ConstVar.FieldType_Float, ConstVar.Sizeof_Float, (short) 4));
        fieldMap06.addField(new Field(ConstVar.FieldType_Double, ConstVar.Sizeof_Double, (short) 5));
        fieldMap06.addField(new Field(ConstVar.FieldType_String, 0, (short) 6));

        Head head06 = new Head();
        head06.setFieldMap(fieldMap06);
        head06.setCompress(compress);

        FormatDataFile fd06 = new FormatDataFile(conf);
        fd06.create(multiFileNameString, head06);

        initOtherFD();

        long begin = System.currentTimeMillis();
        for (int i = 0; i < count; i++) {
            createProject0_6(fd06, i);
            createProject7_34(i);

            if (i % 1000000 == 0) {
                long end = System.currentTimeMillis();
                String string = i + " record ok, delay:" + (end - begin) / 1000 + " s \n";
                output.write(string.getBytes());
            }
        }

        fd06.close();
        closeOtherFD();

        long end = System.currentTimeMillis();
        String string = count + " record over(column), delay:" + (end - begin) / 1000 + " s \n";
        System.out.println(string);
        output.write(string.getBytes());

    }

    public static void doInitFormatStorageFile() throws Exception {
        FieldMap fieldMap = new FieldMap();
        for (short i = 0; i < 5; i++) {
            fieldMap.addField(new Field(ConstVar.FieldType_Byte, ConstVar.Sizeof_Byte, (short) (i * 7)));
            fieldMap.addField(new Field(ConstVar.FieldType_Short, ConstVar.Sizeof_Short, (short) (i * 7 + 1)));
            fieldMap.addField(new Field(ConstVar.FieldType_Int, ConstVar.Sizeof_Int, (short) (i * 7 + 2)));
            fieldMap.addField(new Field(ConstVar.FieldType_Long, ConstVar.Sizeof_Long, (short) (i * 7 + 3)));
            fieldMap.addField(new Field(ConstVar.FieldType_Float, ConstVar.Sizeof_Float, (short) (i * 7 + 4)));
            fieldMap.addField(new Field(ConstVar.FieldType_Double, ConstVar.Sizeof_Double, (short) (i * 7 + 5)));
            fieldMap.addField(new Field(ConstVar.FieldType_String, 0, (short) (i * 7 + 6)));
        }
        Head head = new Head();
        head.setFieldMap(fieldMap);

        System.out.println("write file:" + formatStorageFilename);
        FormatDataFile fd = new FormatDataFile(new Configuration());
        fd.create(formatStorageFilename, head);

        long begin = System.currentTimeMillis();
        for (int i = 0; i < count; i++) {
            Record record = new Record((short) 35);

            for (short j = 0; j < 5; j++) {
                record.addValue(new FieldValue((byte) 1, (short) (j * 7 + 0)));
                record.addValue(new FieldValue((short) 2, (short) (j * 7 + 1)));
                record.addValue(new FieldValue((int) 3, (short) (j * 7 + 2)));
                record.addValue(new FieldValue((long) 4, (short) (j * 7 + 3)));
                record.addValue(new FieldValue((float) 5.5, (short) (j * 7 + 4)));
                record.addValue(new FieldValue((double) 6.6, (short) (j * 7 + 5)));
                record.addValue(new FieldValue("hello konten", (short) (j * 7 + 6)));
            }
            fd.addRecord(record);

            if (i % 1000000 == 0) {
                long end = System.currentTimeMillis();
                String string = i + " record ok(format), delay:" + (end - begin) / 1000 + " s \n";
                output.write(string.getBytes());
            }
        }

        fd.close();
        long end = System.currentTimeMillis();
        String string = count + " record over(formatStorage), delay:" + (end - begin) / 1000 + " s \n";
        output.write(string.getBytes());
        System.out.println(string);

    }

    public static void createProject0_6(FormatDataFile fd, int line) throws Exception {
        Record record = new Record((short) 7);
        record.addValue(new FieldValue((byte) (1 + line), (short) 0));
        record.addValue(new FieldValue((short) (2 + line), (short) 1));
        record.addValue(new FieldValue((int) (3 + line), (short) 2));
        record.addValue(new FieldValue((long) (4 + line), (short) 3));
        record.addValue(new FieldValue((float) (5.5 + line), (short) 4));
        record.addValue(new FieldValue((double) (6.6 + line), (short) 5));
        record.addValue(new FieldValue("hello konten" + line, (short) 6));

        fd.addRecord(record);
    }

    public static void createProject7_34(int line) throws Exception {
        for (int i = 1; i < 5; i++) {
            for (int k = 0; k < 7; k++) {
                Record record = new Record((short) 1);
                if (k == 0) {
                    record.addValue(new FieldValue((byte) (1 + line), (short) (i * 7 + k)));
                } else if (k == 1) {
                    record.addValue(new FieldValue((short) (2 + line), (short) (i * 7 + k)));
                } else if (k == 2) {
                    record.addValue(new FieldValue((int) (3 + line), (short) (i * 7 + k)));
                } else if (k == 3) {
                    record.addValue(new FieldValue((long) (4 + line), (short) (i * 7 + k)));
                } else if (k == 4) {
                    record.addValue(new FieldValue((float) (5.5 + line), (short) (i * 7 + k)));
                } else if (k == 5) {
                    record.addValue(new FieldValue((double) (6.6 + line), (short) (i * 7 + k)));
                } else {
                    record.addValue(new FieldValue("hello konten" + line, (short) (i * 7 + k)));
                }

                fds[i * 7 + k].addRecord(record);
            }
        }
    }

    static void doReadRand() {
        while (1 != 0) {
            doFormatReadRand(count);
        }
    }

    static void doReadSeq() throws Exception {
        boolean compressed = (compress == 1);
        {

            ArrayList<Short> idx = new ArrayList<Short>(10);

            idx.add((short) 0);
            idx.add((short) 1);
            idx.add((short) 2);
            idx.add((short) 3);
            idx.add((short) 4);
            idx.add((short) 5);
            idx.add((short) 6);

            long begin = System.currentTimeMillis();
            doColumnReadSeq(idx, count, compressed);
            long end = System.currentTimeMillis();
            System.out.println("Read Column Seq over(7 field same file) , count:" + count + ", delay:"
                    + (long) ((end - begin) / 1000) + " s");

            idx.clear();
            idx.add((short) 7);
            idx.add((short) 8);
            idx.add((short) 9);
            idx.add((short) 10);
            idx.add((short) 11);
            idx.add((short) 12);
            idx.add((short) 13);
            idx.add((short) 14);
            idx.add((short) 15);
            idx.add((short) 16);
            idx.add((short) 17);
            begin = System.currentTimeMillis();
            doColumnReadSeq(idx, count, compressed);
            end = System.currentTimeMillis();
            System.out.println("Read Column Seq over(11 field diff file) , count:" + count + ", delay:"
                    + (long) ((end - begin) / 1000) + " s");

            idx.clear();
            idx.add((short) 7);
            idx.add((short) 8);
            idx.add((short) 9);
            idx.add((short) 10);
            idx.add((short) 11);
            idx.add((short) 12);
            idx.add((short) 13);
            begin = System.currentTimeMillis();
            doColumnReadSeq(idx, count, compressed);
            end = System.currentTimeMillis();
            System.out.println("Read Column Seq over(7 field diff file) , count:" + count + ", delay:"
                    + (long) ((end - begin) / 1000) + " s");

            idx.clear();
            idx.add((short) 9);
            idx.add((short) 10);
            idx.add((short) 13);
            begin = System.currentTimeMillis();
            doColumnReadSeq(idx, count, compressed);
            end = System.currentTimeMillis();
            System.out.println("Read Column Seq over(3 field diff file) , count:" + count + ", delay:"
                    + (long) ((end - begin) / 1000) + " s");

            idx.clear();
            idx.add((short) 9);
            idx.add((short) 16);
            idx.add((short) 23);
            begin = System.currentTimeMillis();
            doColumnReadSeq(idx, count, compressed);
            end = System.currentTimeMillis();
            System.out.println("Read Column Seq over(3 field diff file, not var) , count:" + count + ", delay:"
                    + (long) ((end - begin) / 1000) + " s");

            idx.clear();
            idx.add((short) 13);
            idx.add((short) 20);
            idx.add((short) 27);
            begin = System.currentTimeMillis();
            doColumnReadSeq(idx, count, compressed);
            end = System.currentTimeMillis();
            System.out.println("Read Column Seq over(3 field diff file, var) , count:" + count + ", delay:"
                    + (long) ((end - begin) / 1000) + " s");

            idx.clear();
            for (short i = 0; i < 35; i++) {
                idx.add(i);
            }
            begin = System.currentTimeMillis();
            doColumnReadSeq(idx, count, compressed);
            end = System.currentTimeMillis();
            System.out.println("Read Column Seq over(35 field) , count:" + count + ", delay:"
                    + (long) ((end - begin) / 1000) + " s");

        }
    }

    static void doColumnReadSeq(ArrayList<Short> idx, int count, boolean compress) throws Exception {
        Path path = new Path(columnPrefix);
        ColumnStorageClient client = new ColumnStorageClient(path, idx, conf);

        for (int i = 0; i < count; i++) {
            try {
                if (!compress) {
                    Record record = client.getRecordByLine(i);
                } else {
                    Record record = client.getNextRecord();
                }
                /*if (record == null)
                {
                String string = "record no:" + i + " return null";
                output.write(string.getBytes());
                }
                    
                if(i % (1*1000000) == 0)
                {
                String string = "read format seq " + i +" record, delay:" + ((System.currentTimeMillis() - begin) / 1000) + " s \n" ;
                output.write(string.getBytes());                   
                }*/

            } catch (Exception e) {
                System.out.println("get exception, line:" + i);
                System.exit(i);
                break;
            }
        }
        client.close();

    }

    static void doFormatReadRand(int count) {
        try {
            String fileName = "MR_input/testMassRecord";
            Configuration conf = new Configuration();
            FormatDataFile fd = new FormatDataFile(conf);
            fd.open(fileName);

            long begin = System.currentTimeMillis();
            for (int i = 0; i < count; i++) {
                int rand = (int) (Math.random() * count);

                Record record = fd.getRecordByLine(rand);
                if (record == null) {
                    String string = "record no:" + rand + " return null";
                    output.write(string.getBytes());
                }

                if (i % (1 * 10000) == 0) {
                    String string = "read format rand " + i + " record, delay:"
                            + ((System.currentTimeMillis() - begin) / 1000) + " s \n";
                    output.write(string.getBytes());
                }
            }
            long end = System.currentTimeMillis();
            String string = "Read Foramt Rand over, count:" + count + ", delay:" + (long) ((end - begin) / 1000)
                    + " s";
            output.write(string.getBytes());
            System.out.println(string);
        } catch (IOException e) {
            e.printStackTrace();
            System.out.println("get IOException:" + e.getMessage());
        } catch (Exception e) {
            e.printStackTrace();
            System.out.println("get exception:" + e.getMessage());
        }
    }

    static void doFormatReadSeq(int count) {
        try {
            FormatDataFile fd = new FormatDataFile(conf);
            fd.open(formatStorageFilename);

            long begin = System.currentTimeMillis();

            Record record = new Record((short) 35);
            for (int i = 0; i < count; i++) {
                record.clear();
                record = fd.getRecordByLine(i);

                /*if (record == null)
                {
                }
                    
                if(i % (1*1000000) == 0)
                {
                String string = "read format seq " + i +" record, delay:" + ((System.currentTimeMillis() - begin) / 1000) + " s \n" ;
                output.write(string.getBytes());                   
                }
                */
            }

            long end = System.currentTimeMillis();
            String string = "Read Foramt Seq over, count:" + count + ", delay:" + (long) ((end - begin) / 1000)
                    + " s";
            output.write(string.getBytes());
            System.out.println(string);
            fd.close();
        } catch (IOException e) {
            e.printStackTrace();
            System.out.println("get IOException:" + e.getMessage());
        } catch (Exception e) {
            e.printStackTrace();
            System.out.println("get exception:" + e.getMessage());
        }
    }

    static void doFormatReadSeq2(int count) {
        try {
            String fileName = "MR_input/testMassRecord";
            Configuration conf = new Configuration();
            FormatDataFile fd = new FormatDataFile(conf);
            fd.open(fileName);

            long begin = System.currentTimeMillis();

            int ct = 0;
            {
                Record record = null;
                if (record == null) {
                    String string = "seq 2 record no:" + ct + " return null";
                    output.write(string.getBytes());
                }

                if (ct % (1 * 10000) == 0) {
                    String string = "read format seq2 " + ct + " record, delay:"
                            + ((System.currentTimeMillis() - begin) / 1000) + " s \n";
                    output.write(string.getBytes());
                }
            }

            long end = System.currentTimeMillis();
            String string = "Read Foramt Seq2 over, count:" + ct + ", delay:" + (long) ((end - begin) / 1000)
                    + " s";
            output.write(string.getBytes());
            System.out.println(string);
            fd.close();
        } catch (IOException e) {
            e.printStackTrace();
            System.out.println("get IOException:" + e.getMessage());
        } catch (Exception e) {
            e.printStackTrace();
            System.out.println("get exception:" + e.getMessage());
        }
    }

    static void doTextReadRand(int count) {
        try {
            String textFile = "MR_input_text/testPerformanceReadText";
            Path path = new Path(textFile);
            FileSystem fs = FileSystem.get(new Configuration());
            FSDataInputStream in = fs.open(path);

            InputStream stream = new BufferedInputStream(in);
            BufferedReader reader = new BufferedReader(new InputStreamReader(stream));

            long begin = System.currentTimeMillis();
            count = 1 * 1000;
            for (int i = 0; i < count; i++) {
                int line = (int) (Math.random() * count);
                for (int j = 0; j < line; j++) {
                    String value = reader.readLine();
                    value = null;
                }
            }
            reader.close();

            long end = System.currentTimeMillis();
            String string = "text read seq, count:" + count + ", delay:" + (long) ((end - begin) / 1000) + " s";
            output.write(string.getBytes());
            System.out.println(string);

        } catch (Exception e) {
            e.printStackTrace();
            System.out.println(e.getMessage());
        }
    }

    static void doTextReadSeq(int count) {
        try {
            ArrayList<Integer> meta = new ArrayList<Integer>(10);
            for (int i = 0; i < 7; i++) {
                meta.add(i);
            }

            Path path = new Path(textFilename);
            FileSystem fs = FileSystem.get(new Configuration());
            FSDataInputStream in = fs.open(path);

            InputStream stream = new BufferedInputStream(in);
            BufferedReader reader = new BufferedReader(new InputStreamReader(stream));

            long begin = System.currentTimeMillis();
            for (int i = 0; i < count; i++) {
                String value = reader.readLine();

                String[] fields = value.split(",");

                ByteArrayInputStream bin = new ByteArrayInputStream(value.getBytes());

                meta.get(0);
                byte[] bb = new byte[4];
                bin.read(bb);
                meta.get(1);
                byte[] sb = new byte[6];
                bin.read(sb);
                meta.get(2);
                byte[] ib = new byte[9];
                bin.read(ib);
                meta.get(3);
                byte[] lb = new byte[13];
                bin.read(lb);
                meta.get(4);
                byte[] fb = new byte[13];
                bin.read(fb);
                meta.get(5);
                byte[] db = new byte[18];
                bin.read(db);
                meta.get(6);
                value = null;
            }
            reader.close();

            long end = System.currentTimeMillis();

            String string = "text read seq " + count + " record over, delay: " + ((end - begin) / 1000) + " s \n";
            System.out.println(string);
        } catch (Exception e) {
            e.printStackTrace();
            System.out.println(e.getMessage());
        }
    }
}