Java tutorial
/*** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.tajo.storage.raw; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.HdfsConfiguration; import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.tajo.BuiltinStorages; import org.apache.tajo.catalog.CatalogUtil; import org.apache.tajo.catalog.Schema; import org.apache.tajo.catalog.TableMeta; import org.apache.tajo.common.TajoDataTypes; import org.apache.tajo.conf.TajoConf; import org.apache.tajo.datum.DatumFactory; import org.apache.tajo.datum.ProtobufDatum; import org.apache.tajo.rpc.protocolrecords.PrimitiveProtos; import org.apache.tajo.storage.Tuple; import org.apache.tajo.storage.fragment.FileFragment; import org.apache.tajo.storage.rawfile.DirectRawFileScanner; import org.apache.tajo.storage.rawfile.DirectRawFileWriter; import org.apache.tajo.tuple.offheap.OffHeapRowBlock; import org.apache.tajo.tuple.offheap.RowWriter; import org.apache.tajo.unit.StorageUnit; import org.apache.tajo.util.FileUtil; import org.apache.tajo.util.ProtoUtil; import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; import java.io.IOException; import java.util.Arrays; import java.util.Collection; import java.util.UUID; import static org.junit.Assert.*; @RunWith(Parameterized.class) public class TestDirectRawFile { private static final Log LOG = LogFactory.getLog(TestDirectRawFile.class); public static String UNICODE_FIELD_PREFIX = "abc__"; public static Schema schema; private static String TEST_PATH = "target/test-data/TestDirectRawFile"; private static MiniDFSCluster cluster; private static FileSystem dfs; private static FileSystem localFs; private TajoConf tajoConf; private Path testDir; @Parameterized.Parameters public static Collection<Object[]> generateParameters() throws IOException { return Arrays.asList(new Object[][] { { false }, { true } }); } public TestDirectRawFile(boolean isLocal) throws IOException { FileSystem fs; if (isLocal) { fs = localFs; } else { fs = dfs; } this.tajoConf = new TajoConf(fs.getConf()); this.testDir = getTestDir(fs, TEST_PATH); } @BeforeClass public static void setUpClass() throws IOException, InterruptedException { final Configuration conf = new HdfsConfiguration(); String testDataPath = TEST_PATH + "/" + UUID.randomUUID().toString(); conf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, testDataPath); conf.setLong(DFSConfigKeys.DFS_NAMENODE_MIN_BLOCK_SIZE_KEY, 0); conf.setBoolean(DFSConfigKeys.DFS_HDFS_BLOCKS_METADATA_ENABLED, false); MiniDFSCluster.Builder builder = new MiniDFSCluster.Builder(new HdfsConfiguration(conf)); builder.numDataNodes(1); builder.format(true); builder.manageNameDfsDirs(true); builder.manageDataDfsDirs(true); builder.waitSafeMode(true); cluster = builder.build(); cluster.waitClusterUp(); dfs = cluster.getFileSystem(); localFs = FileSystem.getLocal(new TajoConf()); } @AfterClass public static void tearDownClass() throws InterruptedException { cluster.shutdown(true); } public Path getTestDir(FileSystem fs, String dir) throws IOException { Path path = new Path(dir); if (fs.exists(path)) fs.delete(path, true); fs.mkdirs(path); return fs.makeQualified(path); } static { schema = new Schema(); schema.addColumn("col0", TajoDataTypes.Type.BOOLEAN); schema.addColumn("col1", TajoDataTypes.Type.INT2); schema.addColumn("col2", TajoDataTypes.Type.INT4); schema.addColumn("col3", TajoDataTypes.Type.INT8); schema.addColumn("col4", TajoDataTypes.Type.FLOAT4); schema.addColumn("col5", TajoDataTypes.Type.FLOAT8); schema.addColumn("col6", TajoDataTypes.Type.TEXT); schema.addColumn("col7", TajoDataTypes.Type.TIMESTAMP); schema.addColumn("col8", TajoDataTypes.Type.DATE); schema.addColumn("col9", TajoDataTypes.Type.TIME); schema.addColumn("col10", TajoDataTypes.Type.INTERVAL); schema.addColumn("col11", TajoDataTypes.Type.INET4); schema.addColumn("col12", CatalogUtil.newDataType(TajoDataTypes.Type.PROTOBUF, PrimitiveProtos.StringProto.class.getName())); } public FileStatus writeRowBlock(TajoConf conf, TableMeta meta, OffHeapRowBlock rowBlock, Path outputFile) throws IOException { DirectRawFileWriter writer = new DirectRawFileWriter(conf, null, schema, meta, outputFile); writer.init(); writer.writeRowBlock(rowBlock); writer.close(); FileStatus status = outputFile.getFileSystem(conf).getFileStatus(outputFile); assertTrue(status.getLen() > 0); LOG.info("Written file size: " + FileUtil.humanReadableByteCount(status.getLen(), false)); return status; } public FileStatus writeRowBlock(TajoConf conf, TableMeta meta, OffHeapRowBlock rowBlock) throws IOException { Path outputDir = new Path(testDir, UUID.randomUUID() + ""); outputDir.getFileSystem(conf).mkdirs(outputDir); Path outputFile = new Path(outputDir, "output.draw"); return writeRowBlock(conf, meta, rowBlock, outputFile); } @Test public void testRWForAllTypesWithNextTuple() throws IOException { int rowNum = 10000; OffHeapRowBlock rowBlock = createRowBlock(rowNum); TableMeta meta = CatalogUtil.newTableMeta(BuiltinStorages.DRAW); FileStatus outputFile = writeRowBlock(tajoConf, meta, rowBlock); rowBlock.release(); FileFragment fragment = new FileFragment("testRWForAllTypesWithNextTuple", outputFile.getPath(), 0, outputFile.getLen()); DirectRawFileScanner reader = new DirectRawFileScanner(tajoConf, schema, meta, fragment); reader.init(); long readStart = System.currentTimeMillis(); int j = 0; Tuple tuple; while ((tuple = reader.next()) != null) { validateTupleResult(j, tuple); j++; } LOG.info("Total read rows: " + j); long readEnd = System.currentTimeMillis(); LOG.info("reading takes " + (readEnd - readStart) + " msec"); reader.close(); assertEquals(rowNum, j); } @Test public void testRepeatedScan() throws IOException { int rowNum = 2; OffHeapRowBlock rowBlock = createRowBlock(rowNum); TableMeta meta = CatalogUtil.newTableMeta(BuiltinStorages.DRAW); FileStatus outputFile = writeRowBlock(tajoConf, meta, rowBlock); rowBlock.release(); FileFragment fragment = new FileFragment("testRepeatedScan", outputFile.getPath(), 0, outputFile.getLen()); DirectRawFileScanner reader = new DirectRawFileScanner(tajoConf, schema, meta, fragment); reader.init(); int j = 0; while (reader.next() != null) { j++; } assertEquals(rowNum, j); for (int i = 0; i < 5; i++) { assertNull(reader.next()); } reader.close(); } @Test public void testReset() throws IOException { int rowNum = 2; OffHeapRowBlock rowBlock = createRowBlock(rowNum); TableMeta meta = CatalogUtil.newTableMeta(BuiltinStorages.DRAW); FileStatus outputFile = writeRowBlock(tajoConf, meta, rowBlock); rowBlock.release(); FileFragment fragment = new FileFragment("testReset", outputFile.getPath(), 0, outputFile.getLen()); DirectRawFileScanner reader = new DirectRawFileScanner(tajoConf, schema, meta, fragment); reader.init(); int j = 0; while (reader.next() != null) { j++; } assertEquals(rowNum, j); for (int i = 0; i < 5; i++) { assertNull(reader.next()); } reader.reset(); j = 0; while (reader.next() != null) { j++; } assertEquals(rowNum, j); for (int i = 0; i < 5; i++) { assertNull(reader.next()); } reader.close(); } public static OffHeapRowBlock createRowBlock(int rowNum) { long allocateStart = System.currentTimeMillis(); OffHeapRowBlock rowBlock = new OffHeapRowBlock(schema, StorageUnit.MB * 8); long allocatedEnd = System.currentTimeMillis(); LOG.info(FileUtil.humanReadableByteCount(rowBlock.size(), true) + " bytes allocated " + (allocatedEnd - allocateStart) + " msec"); long writeStart = System.currentTimeMillis(); for (int i = 0; i < rowNum; i++) { fillRow(i, rowBlock.getWriter()); } long writeEnd = System.currentTimeMillis(); LOG.info("writing takes " + (writeEnd - writeStart) + " msec"); return rowBlock; } public static void fillRow(int i, RowWriter builder) { builder.startRow(); builder.putBool(i % 1 == 0 ? true : false); // 0 builder.putInt2((short) 1); // 1 builder.putInt4(i); // 2 builder.putInt8(i); // 3 builder.putFloat4(i); // 4 builder.putFloat8(i); // 5 builder.putText((UNICODE_FIELD_PREFIX + i).getBytes()); // 6 builder.putTimestamp(DatumFactory.createTimestamp("2014-04-16 08:48:00").asInt8() + i); // 7 builder.putDate(DatumFactory.createDate("2014-04-16").asInt4() + i); // 8 builder.putTime(DatumFactory.createTime("08:48:00").asInt8() + i); // 9 builder.putInterval(DatumFactory.createInterval((i + 1) + " hours")); // 10 builder.putInet4(DatumFactory.createInet4("192.168.0.1").asInt4() + i); // 11 builder.putProtoDatum(new ProtobufDatum(ProtoUtil.convertString(i + ""))); // 12 builder.endRow(); } public static void validateTupleResult(int j, Tuple t) { assertTrue((j % 1 == 0) == t.getBool(0)); assertTrue(1 == t.getInt2(1)); assertEquals(j, t.getInt4(2)); assertEquals(j, t.getInt8(3)); assertTrue(j == t.getFloat4(4)); assertTrue(j == t.getFloat8(5)); assertEquals(new String(UNICODE_FIELD_PREFIX + j), t.getText(6)); assertEquals(DatumFactory.createTimestamp("2014-04-16 08:48:00").asInt8() + (long) j, t.getInt8(7)); assertEquals(DatumFactory.createDate("2014-04-16").asInt4() + j, t.getInt4(8)); assertEquals(DatumFactory.createTime("08:48:00").asInt8() + j, t.getInt8(9)); assertEquals(DatumFactory.createInterval((j + 1) + " hours"), t.getInterval(10)); assertEquals(DatumFactory.createInet4("192.168.0.1").asInt4() + j, t.getInt4(11)); assertEquals(new ProtobufDatum(ProtoUtil.convertString(j + "")), t.getProtobufDatum(12)); } public static void fillRowBlockWithNull(int i, RowWriter writer) { writer.startRow(); if (i == 0) { writer.skipField(); } else { writer.putBool(i % 1 == 0 ? true : false); // 0 } if (i % 1 == 0) { writer.skipField(); } else { writer.putInt2((short) 1); // 1 } if (i % 2 == 0) { writer.skipField(); } else { writer.putInt4(i); // 2 } if (i % 3 == 0) { writer.skipField(); } else { writer.putInt8(i); // 3 } if (i % 4 == 0) { writer.skipField(); } else { writer.putFloat4(i); // 4 } if (i % 5 == 0) { writer.skipField(); } else { writer.putFloat8(i); // 5 } if (i % 6 == 0) { writer.skipField(); } else { writer.putText((UNICODE_FIELD_PREFIX + i).getBytes()); // 6 } if (i % 7 == 0) { writer.skipField(); } else { writer.putTimestamp(DatumFactory.createTimestamp("2014-04-16 08:48:00").asInt8() + i); // 7 } if (i % 8 == 0) { writer.skipField(); } else { writer.putDate(DatumFactory.createDate("2014-04-16").asInt4() + i); // 8 } if (i % 9 == 0) { writer.skipField(); } else { writer.putTime(DatumFactory.createTime("08:48:00").asInt8() + i); // 9 } if (i % 10 == 0) { writer.skipField(); } else { writer.putInterval(DatumFactory.createInterval((i + 1) + " hours")); // 10 } if (i % 11 == 0) { writer.skipField(); } else { writer.putInet4(DatumFactory.createInet4("192.168.0.1").asInt4() + i); // 11 } if (i % 12 == 0) { writer.skipField(); } else { writer.putProtoDatum(new ProtobufDatum(ProtoUtil.convertString(i + ""))); // 12 } writer.endRow(); } public static void validateNullity(int j, Tuple tuple) { if (j == 0) { tuple.isBlankOrNull(0); } else { assertTrue((j % 1 == 0) == tuple.getBool(0)); } if (j % 1 == 0) { tuple.isBlankOrNull(1); } else { assertTrue(1 == tuple.getInt2(1)); } if (j % 2 == 0) { tuple.isBlankOrNull(2); } else { assertEquals(j, tuple.getInt4(2)); } if (j % 3 == 0) { tuple.isBlankOrNull(3); } else { assertEquals(j, tuple.getInt8(3)); } if (j % 4 == 0) { tuple.isBlankOrNull(4); } else { assertTrue(j == tuple.getFloat4(4)); } if (j % 5 == 0) { tuple.isBlankOrNull(5); } else { assertTrue(j == tuple.getFloat8(5)); } if (j % 6 == 0) { tuple.isBlankOrNull(6); } else { assertEquals(new String(UNICODE_FIELD_PREFIX + j), tuple.getText(6)); } if (j % 7 == 0) { tuple.isBlankOrNull(7); } else { assertEquals(DatumFactory.createTimestamp("2014-04-16 08:48:00").asInt8() + (long) j, tuple.getInt8(7)); } if (j % 8 == 0) { tuple.isBlankOrNull(8); } else { assertEquals(DatumFactory.createDate("2014-04-16").asInt4() + j, tuple.getInt4(8)); } if (j % 9 == 0) { tuple.isBlankOrNull(9); } else { assertEquals(DatumFactory.createTime("08:48:00").asInt8() + j, tuple.getInt8(9)); } if (j % 10 == 0) { tuple.isBlankOrNull(10); } else { assertEquals(DatumFactory.createInterval((j + 1) + " hours"), tuple.getInterval(10)); } if (j % 11 == 0) { tuple.isBlankOrNull(11); } else { assertEquals(DatumFactory.createInet4("192.168.0.1").asInt4() + j, tuple.getInt4(11)); } if (j % 12 == 0) { tuple.isBlankOrNull(12); } else { assertEquals(new ProtobufDatum(ProtoUtil.convertString(j + "")), tuple.getProtobufDatum(12)); } } }