TestColumnStorageInputFormat.java Source code

Java tutorial

Introduction

Here is the source code for TestColumnStorageInputFormat.java

Source

/**
* Tencent is pleased to support the open source community by making TDW available.
* Copyright (C) 2014 THL A29 Limited, a Tencent company. All rights reserved.
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use 
* this file except in compliance with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed 
* under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS 
* OF ANY KIND, either express or implied. See the License for the specific language governing
* permissions and limitations under the License.
*/

import java.io.IOException;
import java.util.List;
import java.util.Properties;
import java.util.concurrent.Delayed;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.shell.Count;
import org.apache.hadoop.hive.serde.Constants;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.InputFormat;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.TextInputFormat;

import Comm.ConstVar;
import FormatStorage.FieldMap;
import FormatStorage.Head;
import FormatStorage.Unit;
import FormatStorage.FieldMap.Field;
import FormatStorage.Unit.FieldValue;
import FormatStorage.Unit.Record;
import StorageEngineClient.ColumnStorageInputFormat;
import StorageEngineClient.ColumnStorageSplit;
import StorageEngineClient.FormatStorageInputFormat;
import StorageEngineClient.FormatStorageOutputFormat;
import StorageEngineClient.FormatStorageSerDe;
import StorageEngineClient.FormatStorageSplit;

public class TestColumnStorageInputFormat {
    public static void main(String[] argv) throws IOException, SerDeException {
        try {
            if (argv.length != 2) {
                System.out.println("TestColumnStorageInputFormat <input> idx");
                System.exit(-1);
            }

            JobConf conf = new JobConf(TestColumnStorageInputFormat.class);

            conf.setJobName("TestColumnStorageInputFormat");

            conf.setNumMapTasks(1);
            conf.setNumReduceTasks(1);

            conf.setOutputKeyClass(LongWritable.class);
            conf.setOutputValueClass(Unit.Record.class);

            conf.setInputFormat(TextInputFormat.class);
            conf.set("mapred.output.compress", "flase");

            conf.set("mapred.input.dir", argv[0]);

            conf.set("hive.io.file.readcolumn.ids", argv[1]);

            FormatStorageSerDe serDe = initSerDe(conf);
            StandardStructObjectInspector oi = (StandardStructObjectInspector) serDe.getObjectInspector();
            List<? extends StructField> fieldRefs = oi.getAllStructFieldRefs();

            FileInputFormat.setInputPaths(conf, argv[0]);
            Path outputPath = new Path(argv[1]);
            FileOutputFormat.setOutputPath(conf, outputPath);

            InputFormat inputFormat = new ColumnStorageInputFormat();
            long begin = System.currentTimeMillis();
            InputSplit[] inputSplits = inputFormat.getSplits(conf, 1);
            long end = System.currentTimeMillis();
            System.out.println("getsplit delay " + (end - begin) + " ms");

            if (inputSplits.length == 0) {
                System.out.println("inputSplits is empty");
                return;
            } else {
                System.out.println("get Splits:" + inputSplits.length);
            }

            int size = inputSplits.length;
            System.out.println("getSplits return size:" + size);
            for (int i = 0; i < size; i++) {
                ColumnStorageSplit split = (ColumnStorageSplit) inputSplits[i];
                System.out.printf("split:" + i + " offset:" + split.getStart() + "len:" + split.getLength()
                        + "path:" + split.getPath().toString() + "beginLine:" + split.getBeginLine() + "endLine:"
                        + split.getEndLine());
                if (split.getFileName() != null) {
                    System.out.println("fileName:" + split.getFileName());
                } else {
                    System.out.println("fileName null");
                }
                if (split.fileList() != null) {
                    System.out.println("fileList.num:" + split.fileList().size());
                    for (int j = 0; j < split.fileList().size(); j++) {
                        System.out.println("filelist " + j + ":" + split.fileList().get(j));
                    }
                }
            }

            while (true) {
                int totalDelay = 0;
                RecordReader<WritableComparable, Writable> currRecReader = null;
                for (int i = 0; i < inputSplits.length; i++) {
                    currRecReader = inputFormat.getRecordReader(inputSplits[i], conf, Reporter.NULL);

                    WritableComparable key;
                    Writable value;

                    key = currRecReader.createKey();
                    value = currRecReader.createValue();

                    begin = System.currentTimeMillis();
                    int count = 0;
                    while (currRecReader.next(key, value)) {

                        Record record = (Record) value;

                        Object row = serDe.deserialize(record);
                        count++;

                    }
                    end = System.currentTimeMillis();

                    long delay = (end - begin) / 1000;
                    totalDelay += delay;
                    System.out.println(count + " record read over, delay " + delay + " s");
                }

                System.out.println("total delay:" + totalDelay + "\n");
            }

        } catch (Exception e) {
            e.printStackTrace();
            System.out.println("get exception:" + e.getMessage());
        }
    }

    public static void initHead(Head head) {
        short fieldNum = 7;
        FieldMap fieldMap = new FieldMap();
        fieldMap.addField(new Field(ConstVar.FieldType_Byte, ConstVar.Sizeof_Byte, (short) 0));
        fieldMap.addField(new Field(ConstVar.FieldType_Short, ConstVar.Sizeof_Short, (short) 1));
        fieldMap.addField(new Field(ConstVar.FieldType_Int, ConstVar.Sizeof_Int, (short) 2));
        fieldMap.addField(new Field(ConstVar.FieldType_Long, ConstVar.Sizeof_Long, (short) 3));
        fieldMap.addField(new Field(ConstVar.FieldType_Float, ConstVar.Sizeof_Float, (short) 4));
        fieldMap.addField(new Field(ConstVar.FieldType_Double, ConstVar.Sizeof_Double, (short) 5));
        fieldMap.addField(new Field(ConstVar.FieldType_String, 0, (short) 6));

        head.setFieldMap(fieldMap);

        head.setVar(ConstVar.VarFlag);
    }

    public static FormatStorageSerDe initSerDe(Configuration conf) throws SerDeException {
        FormatStorageSerDe serDe = new FormatStorageSerDe();
        Properties tbl = createProperties();

        serDe.initialize(conf, tbl);

        /*        Record record = new Record((short) 7);
                record.addValue(new FieldValue((byte)1, (short)0));
                record.addValue(new FieldValue((short)2, (short)1));
                record.addValue(new FieldValue((int)3, (short)2));
                record.addValue(new FieldValue((long)4, (short)3));
                record.addValue(new FieldValue((float)5.5, (short)4));
                record.addValue(new FieldValue((double)6.6, (short)5));
                record.addValue(new FieldValue("hello konten", (short)6));
                    
                    
                Object[] refer = {new Byte((byte) 1),new Short((short) 2), new Integer(3), new Long(4),
                      new Float(5.5), new Double(6.6), new String("hello konten")};
                    
          */
        return serDe;
    }

    public static Properties createProperties() {
        Properties tbl = new Properties();

        /*
        tbl.setProperty(Constants.SERIALIZATION_FORMAT, "9");
            
        tbl.setProperty("columns","abyte,ashort,aint,along,afloat,adouble,astring");
            
        tbl.setProperty("columns.types","tinyint:smallint:int:bigint:float:double:string");
        tbl.setProperty(Constants.SERIALIZATION_NULL_FORMAT, "NULL");
        */

        tbl.setProperty(Constants.SERIALIZATION_FORMAT, "9");

        tbl.setProperty("columns",
                "insertdate,flag,pindao,qq_num,pv,time_0,time_1,time_2,time_3,time_4,time_5,time_6,time_7,time_8,time_9,time_10,time_11,time_12,time_13,time_14,time_15,time_16,time_17,time_18,time_19,time_20,time_21,time_22,time_23,day_1,day_2,day_3,day_4,day_5,day_6,day_7,day_8,day_9,day_10 ,day_11 ,day_12 ,day_13 ,day_14 ,day_15 ,day_16 ,day_17 ,day_18 ,day_19 ,day_20 ,day_21 ,day_22 ,day_23 ,day_24 ,day_25 ,day_26 ,day_27 ,day_28,day_29,day_30,day_31,ref_minisite_pv,ref_tips_pvref_mail_pv,ref_soso_pv,ref_minigame_pv,ref_own_pv");

        tbl.setProperty("columns.types",
                "string,string,string,int,smallint,smallint,smallint,smallint,smallint,smallint,smallint,smallint,smallint,smallint,smallint,smallint,smallint,smallint,smallint,smallint,smallint,smallint,smallint,smallint,smallint,smallint,smallint,smallint,smallint,smallint,smallint,smallint,smallint,smallint,smallint,smallint,smallint,smallint,smallint,smallint,smallint,smallint,smallint,smallint,smallint,smallint,smallint,smallint,smallint,smallint,smallint,smallint,smallint,smallint,smallint,smallint,smallint,smallint,smallint,smallint,smallint,smallint,smallint,smallint,smallint");
        tbl.setProperty(Constants.SERIALIZATION_NULL_FORMAT, "NULL");
        return tbl;
    }

}