org.apache.hadoop.mapred.nativetask.kvtest.TestInputFile.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.hadoop.mapred.nativetask.kvtest.TestInputFile.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.mapred.nativetask.kvtest;

import java.io.IOException;
import java.util.HashMap;
import java.util.Random;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.BooleanWritable;
import org.apache.hadoop.io.ByteWritable;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.VIntWritable;
import org.apache.hadoop.io.VLongWritable;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.nativetask.testutil.BytesFactory;
import org.apache.hadoop.mapred.nativetask.testutil.ScenarioConfiguration;
import org.apache.hadoop.mapred.nativetask.testutil.TestConstants;

public class TestInputFile {
    private static Log LOG = LogFactory.getLog(TestInputFile.class);

    public static class KVSizeScope {
        private static final int DefaultMinNum = 1;
        private static final int DefaultMaxNum = 64;

        public int minBytesNum;
        public int maxBytesNum;

        public KVSizeScope() {
            this.minBytesNum = DefaultMinNum;
            this.maxBytesNum = DefaultMaxNum;
        }

        public KVSizeScope(int min, int max) {
            this.minBytesNum = min;
            this.maxBytesNum = max;
        }
    }

    private static HashMap<String, KVSizeScope> map = new HashMap<String, KVSizeScope>();

    private byte[] databuf = null;
    private final String keyClsName, valueClsName;
    private int filesize = 0;
    private int keyMaxBytesNum, keyMinBytesNum;
    private int valueMaxBytesNum, valueMinBytesNum;
    private SequenceFile.Writer writer = null;
    Random r = new Random();
    public static final int DATABUFSIZE = 1 << 22; // 4M

    private enum State {
        KEY, VALUE
    };

    static {
        map.put(BooleanWritable.class.getName(), new KVSizeScope(1, 1));
        map.put(DoubleWritable.class.getName(), new KVSizeScope(8, 8));
        map.put(FloatWritable.class.getName(), new KVSizeScope(4, 4));
        map.put(VLongWritable.class.getName(), new KVSizeScope(8, 8));
        map.put(ByteWritable.class.getName(), new KVSizeScope(1, 1));
        map.put(LongWritable.class.getName(), new KVSizeScope(8, 8));
        map.put(VIntWritable.class.getName(), new KVSizeScope(4, 4));
        map.put(IntWritable.class.getName(), new KVSizeScope(4, 4));
    }

    public TestInputFile(int filesize, String keytype, String valuetype, Configuration conf) throws Exception {
        this.filesize = filesize;
        this.databuf = new byte[DATABUFSIZE];
        this.keyClsName = keytype;
        this.valueClsName = valuetype;
        final int defaultMinBytes = conf.getInt(TestConstants.NATIVETASK_KVSIZE_MIN, 1);
        final int defaultMaxBytes = conf.getInt(TestConstants.NATIVETASK_KVSIZE_MAX, 64);

        if (map.get(keytype) != null) {
            keyMinBytesNum = map.get(keytype).minBytesNum;
            keyMaxBytesNum = map.get(keytype).maxBytesNum;
        } else {
            keyMinBytesNum = defaultMinBytes;
            keyMaxBytesNum = defaultMaxBytes;
        }

        if (map.get(valuetype) != null) {
            valueMinBytesNum = map.get(valuetype).minBytesNum;
            valueMaxBytesNum = map.get(valuetype).maxBytesNum;
        } else {
            valueMinBytesNum = defaultMinBytes;
            valueMaxBytesNum = defaultMaxBytes;
        }
    }

    public void createSequenceTestFile(String filepath) throws Exception {
        int FULL_BYTE_SPACE = 256;
        createSequenceTestFile(filepath, FULL_BYTE_SPACE);
    }

    public void createSequenceTestFile(String filepath, int base) throws Exception {
        createSequenceTestFile(filepath, base, (byte) 0);
    }

    public void createSequenceTestFile(String filepath, int base, byte start) throws Exception {
        LOG.info("creating file " + filepath + "(" + filesize + " bytes)");
        LOG.info(keyClsName + " " + valueClsName);
        Class<?> tmpkeycls, tmpvaluecls;
        try {
            tmpkeycls = Class.forName(keyClsName);
        } catch (final ClassNotFoundException e) {
            throw new Exception("key class not found: ", e);
        }
        try {
            tmpvaluecls = Class.forName(valueClsName);
        } catch (final ClassNotFoundException e) {
            throw new Exception("key class not found: ", e);
        }
        try {
            final Path outputfilepath = new Path(filepath);
            final ScenarioConfiguration conf = new ScenarioConfiguration();
            writer = SequenceFile.createWriter(conf, SequenceFile.Writer.file(outputfilepath),
                    SequenceFile.Writer.keyClass(tmpkeycls), SequenceFile.Writer.valueClass(tmpvaluecls));
        } catch (final Exception e) {
            e.printStackTrace();
        }

        int tmpfilesize = this.filesize;
        while (tmpfilesize > DATABUFSIZE) {
            nextRandomBytes(databuf, base, start);
            final int size = flushBuf(DATABUFSIZE);
            tmpfilesize -= size;
        }
        nextRandomBytes(databuf, base, start);
        flushBuf(tmpfilesize);

        if (writer != null) {
            IOUtils.closeStream(writer);
        } else {
            throw new Exception("no writer to create sequenceTestFile!");
        }
    }

    private void nextRandomBytes(byte[] buf, int base) {
        nextRandomBytes(buf, base, (byte) 0);
    }

    private void nextRandomBytes(byte[] buf, int base, byte start) {
        r.nextBytes(buf);
        for (int i = 0; i < buf.length; i++) {
            buf[i] = (byte) ((buf[i] & 0xFF) % base + start);
        }
    }

    private int flushBuf(int buflen) throws Exception {
        final Random r = new Random();
        int keybytesnum = 0;
        int valuebytesnum = 0;
        int offset = 0;

        Writable keyWritable = BytesFactory.newObject(null, keyClsName);
        Writable valWritable = BytesFactory.newObject(null, valueClsName);

        while (offset < buflen) {
            final int remains = buflen - offset;
            keybytesnum = keyMaxBytesNum;
            if (keyMaxBytesNum != keyMinBytesNum) {
                keybytesnum = keyMinBytesNum + r.nextInt(keyMaxBytesNum - keyMinBytesNum);
            }

            valuebytesnum = valueMaxBytesNum;
            if (valueMaxBytesNum != valueMinBytesNum) {
                valuebytesnum = valueMinBytesNum + r.nextInt(valueMaxBytesNum - valueMinBytesNum);
            }

            if (keybytesnum + valuebytesnum > remains) {
                break;
            }

            final byte[] key = new byte[keybytesnum];
            final byte[] value = new byte[valuebytesnum];

            System.arraycopy(databuf, offset, key, 0, keybytesnum);
            offset += keybytesnum;

            System.arraycopy(databuf, offset, value, 0, valuebytesnum);
            offset += valuebytesnum;

            BytesFactory.updateObject(keyWritable, key);
            BytesFactory.updateObject(valWritable, value);

            try {
                writer.append(keyWritable, valWritable);
            } catch (final IOException e) {
                e.printStackTrace();
                throw new Exception("sequence file create failed", e);
            }
        }
        return offset;
    }

}