Java tutorial
/* * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. * * Project and contact information: http://www.cascading.org/ * * This file is part of the Cascading project. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package cascading.tuple.hadoop; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; import cascading.PlatformTestCase; import cascading.tuple.Tuple; import cascading.tuple.hadoop.io.HadoopTupleInputStream; import cascading.tuple.hadoop.io.HadoopTupleOutputStream; import cascading.tuple.io.TupleInputStream; import cascading.tuple.io.TupleOutputStream; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.BooleanWritable; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.serializer.WritableSerialization; import org.junit.Test; /** * */ public class HadoopSerializationPlatformTest extends PlatformTestCase { public HadoopSerializationPlatformTest() { } @Test public void testInputOutputSerialization() throws IOException { long time = System.currentTimeMillis(); Configuration jobConf = new Configuration(); jobConf.set("io.serializations", TestSerialization.class.getName() + "," + WritableSerialization.class.getName()); // disable/replace WritableSerialization class jobConf.set("cascading.serialization.tokens", "1000=" + BooleanWritable.class.getName() + ",10001=" + Text.class.getName()); // not using Text, just testing parsing TupleSerialization tupleSerialization = new TupleSerialization(jobConf); File file = new File(getOutputPath("serialization")); file.mkdirs(); file = new File(file, "/test.bytes"); TupleOutputStream output = new HadoopTupleOutputStream(new FileOutputStream(file, false), tupleSerialization.getElementWriter()); for (int i = 0; i < 501; i++) // 501 is arbitrary { String aString = "string number " + i; double random = Math.random(); output.writeTuple(new Tuple(i, aString, random, new TestText(aString), new Tuple("inner tuple", new BytesWritable("some string".getBytes())), new BytesWritable(Integer.toString(i).getBytes("UTF-8")), new BooleanWritable(false))); } output.close(); assertEquals("wrong size", 89967L, file.length()); // just makes sure the file size doesnt change from expected TupleInputStream input = new HadoopTupleInputStream(new FileInputStream(file), tupleSerialization.getElementReader()); int k = -1; for (int i = 0; i < 501; i++) { Tuple tuple = input.readTuple(); int value = tuple.getInteger(0); assertTrue("wrong diff", value - k == 1); assertTrue("wrong type", tuple.getObject(3) instanceof TestText); assertTrue("wrong type", tuple.getObject(4) instanceof Tuple); assertTrue("wrong type", tuple.getObject(5) instanceof BytesWritable); byte[] bytes = ((BytesWritable) tuple.getObject(5)).getBytes(); String string = new String(bytes, 0, bytes.length > 1 ? bytes.length - 1 : bytes.length, "UTF-8"); assertEquals("wrong value", Integer.parseInt(string), i); assertTrue("wrong type", tuple.getObject(6) instanceof BooleanWritable); k = value; } input.close(); System.out.println("time = " + (System.currentTimeMillis() - time)); } }