org.apache.hadoop.io.TestUTF8.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.hadoop.io.TestUTF8.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.io;

import junit.framework.TestCase;

import java.util.Arrays;
import java.util.Random;

import org.apache.commons.lang.RandomStringUtils;

/** Unit tests for UTF8. */
@SuppressWarnings("deprecation")
public class TestUTF8 extends TestCase {
    public TestUTF8(String name) {
        super(name);
    }

    private static final Random RANDOM = new Random();

    public static String getTestString() throws Exception {
        StringBuffer buffer = new StringBuffer();
        int length = RANDOM.nextInt(1010);
        for (int i = 0; i < length; i++) {
            buffer.append((char) (RANDOM.nextInt(Character.MAX_VALUE)));
        }
        return buffer.toString();
    }

    public void testWritable() throws Exception {
        for (int i = 0; i < 10000; i++) {
            TestWritable.testWritable(new UTF8(getTestString()));
        }
    }

    public void testGetBytes() throws Exception {
        for (int i = 0; i < 10000; i++) {

            // generate a random string
            String before = getTestString();

            // check its utf8
            assertEquals(before, new String(UTF8.getBytes(before), "UTF-8"));
        }
    }

    public void testIO() throws Exception {
        DataOutputBuffer out = new DataOutputBuffer();
        DataInputBuffer in = new DataInputBuffer();

        for (int i = 0; i < 10000; i++) {
            // generate a random string
            String before = getTestString();

            // write it
            out.reset();
            UTF8.writeString(out, before);

            // test that it reads correctly
            in.reset(out.getData(), out.getLength());
            String after = UTF8.readString(in);
            assertTrue(before.equals(after));

            // test that it reads correctly with DataInput
            in.reset(out.getData(), out.getLength());
            String after2 = in.readUTF();
            assertTrue(before.equals(after2));

            // test that it is compatible with Java's other decoder
            String after3 = new String(out.getData(), 2, out.getLength() - 2, "UTF-8");
            assertTrue(before.equals(after3));

        }

    }

    public void testOpt() throws Exception {
        testOpt(false);
        testOpt(true);
    }

    /**
     * Test optimized writes
     * @param utf - true for unicode, false for ascii
     */
    private void testOpt(boolean unicode) throws Exception {
        DataOutputBuffer outOpt = new DataOutputBuffer();
        DataOutputBuffer outReg = new DataOutputBuffer();

        DataInputBuffer in = new DataInputBuffer();

        for (int i = 0; i < 10000; i++) {
            // generate a random string
            String before = null;
            if (unicode) {
                before = RandomStringUtils.random(1010);
            } else {
                before = RandomStringUtils.randomAscii(1010);
            }
            // write it
            outOpt.reset();
            outReg.reset();
            UTF8.writeStringOpt(outOpt, before);
            UTF8.writeString(outReg, before);

            // test that it reads correctly (opt)
            in.reset(outOpt.getData(), outOpt.getLength());
            String afterOpt = UTF8.readString(in);
            assertTrue(before.equals(afterOpt));

            // test that it reads correctly (reg)
            in.reset(outReg.getData(), outReg.getLength());
            String afterReg = UTF8.readString(in);
            assertTrue(before.equals(afterReg));

            // test setters
            UTF8 optUTF8 = new UTF8();
            optUTF8.set(before, true);

            UTF8 regUTF8 = new UTF8();
            regUTF8.set(before, false);

            assertEquals(optUTF8.toString(), regUTF8.toString());
            assertTrue(Arrays.equals(UTF8.getBytes(before), optUTF8.getBytes()));
            assertTrue(Arrays.equals(UTF8.getBytes(before), regUTF8.getBytes()));
        }

    }

    public void testNullEncoding() throws Exception {
        String s = new String(new char[] { 0 });

        DataOutputBuffer dob = new DataOutputBuffer();
        new UTF8(s).write(dob);

        assertEquals(s, new String(dob.getData(), 2, dob.getLength() - 2, "UTF-8"));
    }

}