Text2FormatStorageMR.java Source code

Java tutorial

Introduction

Here is the source code for Text2FormatStorageMR.java

Source

/**
* Tencent is pleased to support the open source community by making TDW available.
* Copyright (C) 2014 THL A29 Limited, a Tencent company. All rights reserved.
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use 
* this file except in compliance with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed 
* under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS 
* OF ANY KIND, either express or implied. See the License for the specific language governing
* permissions and limitations under the License.
*/

import java.io.IOException;

import java.util.Iterator;
import java.text.SimpleDateFormat;
import java.util.Calendar;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.*;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapred.*;

import Comm.ConstVar;
import FormatStorage.FieldMap;
import FormatStorage.Head;
import FormatStorage.Unit;
import FormatStorage.Unit.FieldValue;
import FormatStorage.Unit.Record;
import FormatStorage.FieldMap.Field;
import StorageEngineClient.FormatStorageInputFormat;
import StorageEngineClient.FormatStorageOutputFormat;
import StorageEngineClient.FormatStorageSplit;

public class Text2FormatStorageMR {
    public static final Log LOG = LogFactory.getLog("Text2FormatMR");

    public static class TextFileTestMapper extends MapReduceBase implements Mapper {
        public void configure(JobConf job) {
        }

        public void map(Object key, Object value, OutputCollector output, Reporter reporter) throws IOException {
            String content = ((Text) value).toString();

            String[] values = content.split(",");

            try {

                Record record = new Record((short) values.length);
                record.addValue(new FieldValue(Byte.valueOf(values[0]), (short) 0));
                record.addValue(new FieldValue(Short.valueOf(values[1]), (short) 1));
                record.addValue(new FieldValue(Integer.valueOf(values[2]), (short) 2));
                record.addValue(new FieldValue(Long.valueOf(values[3]), (short) 3));
                record.addValue(new FieldValue(Float.valueOf(values[4]), (short) 4));
                record.addValue(new FieldValue(Double.valueOf(values[5]), (short) 5));
                record.addValue(new FieldValue(values[6], (short) 6));

                LongWritable lw = new LongWritable((long) (Math.random() * 100));

                output.collect(lw, record);
            } catch (Exception e) {
            }
        }

        public void close() {

        }

    }

    public static class FormatFileTestReducer extends MapReduceBase implements Reducer {

        public void configure(JobConf job) {
        }

        public void reduce(Object key, Iterator values, OutputCollector output, Reporter reporter)
                throws IOException {
            LongWritable lw = new LongWritable(0);
            while (values.hasNext()) {
                output.collect(lw, (Writable) values.next());
            }
        }

        public void close() {

        }
    }

    @SuppressWarnings("unchecked")
    public static int readFormatFile(JobConf conf, String inputPath, int lineNum) throws Exception {

        RecordReader<WritableComparable, Writable> currRecReader;

        conf.set("mapred.input.dir", inputPath);

        InputFormat inputFormat = new FormatStorageInputFormat();
        InputSplit[] inputSplits = inputFormat.getSplits(conf, 1);
        if (inputSplits.length == 0) {
            System.out.println("inputSplits is empty");
            return -1;
        }

        currRecReader = inputFormat.getRecordReader(inputSplits[0], conf, Reporter.NULL);

        WritableComparable key;
        Writable value;

        key = currRecReader.createKey();
        value = currRecReader.createValue();

        int num = 0;

        while (true) {
            boolean ret = currRecReader.next(key, value);
            if (ret) {
                Text Line = (Text) key;
                System.out.println(Line.toString());
                num++;
                if (num >= lineNum)
                    break;

            } else
                break;
        }

        return 0;
    }

    public static void initHead(Head head) {
        short fieldNum = 7;
        FieldMap fieldMap = new FieldMap();
        fieldMap.addField(new Field(ConstVar.FieldType_Byte, ConstVar.Sizeof_Byte, (short) 0));
        fieldMap.addField(new Field(ConstVar.FieldType_Short, ConstVar.Sizeof_Short, (short) 1));
        fieldMap.addField(new Field(ConstVar.FieldType_Int, ConstVar.Sizeof_Int, (short) 2));
        fieldMap.addField(new Field(ConstVar.FieldType_Long, ConstVar.Sizeof_Long, (short) 3));
        fieldMap.addField(new Field(ConstVar.FieldType_Float, ConstVar.Sizeof_Float, (short) 4));
        fieldMap.addField(new Field(ConstVar.FieldType_Double, ConstVar.Sizeof_Double, (short) 5));
        fieldMap.addField(new Field(ConstVar.FieldType_String, 0, (short) 6));

        head.setFieldMap(fieldMap);

        head.setVar(ConstVar.VarFlag);
    }

    @SuppressWarnings({ "unchecked", "deprecation" })
    public static void showSplits(JobConf conf) throws IOException {
        FormatStorageInputFormat inputFormat = new FormatStorageInputFormat();
        InputSplit[] splits = inputFormat.getSplits(conf, 1);
        int size = splits.length;
        System.out.println("getSplits return size:" + size);
        for (int i = 0; i < size; i++) {
            FormatStorageSplit split = (FormatStorageSplit) splits[i];
            System.out.printf("split:" + i + "offset:" + split.getStart() + "len:" + split.getLength() + "path:"
                    + conf.get(ConstVar.InputPath) + "beginLine:" + split.getBeginLine() + "endLine:"
                    + split.getEndLine() + "\n");
        }
    }

    @SuppressWarnings("deprecation")
    public static void main(String[] args) throws Exception {

        if (args.length != 2) {
            System.out.println("FormatFileMR <input> <output>");
            System.exit(-1);
        }

        JobConf conf = new JobConf(FormatStorageMR.class);

        conf.setJobName("Text2FormatMR");

        conf.setNumMapTasks(1);
        conf.setNumReduceTasks(4);

        conf.setOutputKeyClass(LongWritable.class);
        conf.setOutputValueClass(Unit.Record.class);

        conf.setMapperClass(TextFileTestMapper.class);
        conf.setReducerClass(FormatFileTestReducer.class);

        conf.setInputFormat(TextInputFormat.class);
        conf.setOutputFormat(FormatStorageOutputFormat.class);
        conf.set("mapred.output.compress", "flase");

        Head head = new Head();
        initHead(head);

        head.toJobConf(conf);

        FileInputFormat.setInputPaths(conf, args[0]);
        Path outputPath = new Path(args[1]);
        FileOutputFormat.setOutputPath(conf, outputPath);

        FileSystem fs = outputPath.getFileSystem(conf);
        fs.delete(outputPath, true);

        JobClient jc = new JobClient(conf);
        RunningJob rj = null;
        rj = jc.submitJob(conf);

        String lastReport = "";
        SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd hh:mm:ss,SSS");
        long reportTime = System.currentTimeMillis();
        long maxReportInterval = 3 * 1000;
        while (!rj.isComplete()) {
            try {
                Thread.sleep(1000);
            } catch (InterruptedException e) {
            }

            int mapProgress = Math.round(rj.mapProgress() * 100);
            int reduceProgress = Math.round(rj.reduceProgress() * 100);

            String report = " map = " + mapProgress + "%,  reduce = " + reduceProgress + "%";

            if (!report.equals(lastReport) || System.currentTimeMillis() >= reportTime + maxReportInterval) {

                String output = dateFormat.format(Calendar.getInstance().getTime()) + report;
                System.out.println(output);
                lastReport = report;
                reportTime = System.currentTimeMillis();
            }
        }

        System.exit(0);

    }

}