cascading.tuple.hadoop.HadoopSerializationPlatformTest.java Source code

Java tutorial

Introduction

Here is the source code for cascading.tuple.hadoop.HadoopSerializationPlatformTest.java

Source

/*
 * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
 *
 * Project and contact information: http://www.cascading.org/
 *
 * This file is part of the Cascading project.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package cascading.tuple.hadoop;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;

import cascading.PlatformTestCase;
import cascading.tuple.Tuple;
import cascading.tuple.hadoop.io.HadoopTupleInputStream;
import cascading.tuple.hadoop.io.HadoopTupleOutputStream;
import cascading.tuple.io.TupleInputStream;
import cascading.tuple.io.TupleOutputStream;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.BooleanWritable;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.serializer.WritableSerialization;
import org.junit.Test;

/**
 *
 */
public class HadoopSerializationPlatformTest extends PlatformTestCase {
    public HadoopSerializationPlatformTest() {
    }

    @Test
    public void testInputOutputSerialization() throws IOException {
        long time = System.currentTimeMillis();

        Configuration jobConf = new Configuration();

        jobConf.set("io.serializations",
                TestSerialization.class.getName() + "," + WritableSerialization.class.getName()); // disable/replace WritableSerialization class
        jobConf.set("cascading.serialization.tokens",
                "1000=" + BooleanWritable.class.getName() + ",10001=" + Text.class.getName()); // not using Text, just testing parsing

        TupleSerialization tupleSerialization = new TupleSerialization(jobConf);

        File file = new File(getOutputPath("serialization"));

        file.mkdirs();
        file = new File(file, "/test.bytes");

        TupleOutputStream output = new HadoopTupleOutputStream(new FileOutputStream(file, false),
                tupleSerialization.getElementWriter());

        for (int i = 0; i < 501; i++) // 501 is arbitrary
        {
            String aString = "string number " + i;
            double random = Math.random();

            output.writeTuple(new Tuple(i, aString, random, new TestText(aString),
                    new Tuple("inner tuple", new BytesWritable("some string".getBytes())),
                    new BytesWritable(Integer.toString(i).getBytes("UTF-8")), new BooleanWritable(false)));
        }

        output.close();

        assertEquals("wrong size", 89967L, file.length()); // just makes sure the file size doesnt change from expected

        TupleInputStream input = new HadoopTupleInputStream(new FileInputStream(file),
                tupleSerialization.getElementReader());

        int k = -1;
        for (int i = 0; i < 501; i++) {
            Tuple tuple = input.readTuple();
            int value = tuple.getInteger(0);
            assertTrue("wrong diff", value - k == 1);
            assertTrue("wrong type", tuple.getObject(3) instanceof TestText);
            assertTrue("wrong type", tuple.getObject(4) instanceof Tuple);
            assertTrue("wrong type", tuple.getObject(5) instanceof BytesWritable);

            byte[] bytes = ((BytesWritable) tuple.getObject(5)).getBytes();
            String string = new String(bytes, 0, bytes.length > 1 ? bytes.length - 1 : bytes.length, "UTF-8");
            assertEquals("wrong value", Integer.parseInt(string), i);
            assertTrue("wrong type", tuple.getObject(6) instanceof BooleanWritable);
            k = value;
        }

        input.close();

        System.out.println("time = " + (System.currentTimeMillis() - time));
    }
}