Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hive.ql.io.orc; import java.io.BufferedReader; import java.io.File; import java.io.FileOutputStream; import java.io.FileReader; import java.io.IOException; import java.io.PrintStream; import java.nio.charset.Charset; import java.nio.charset.CharsetDecoder; import org.apache.commons.io.FileUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.JobConf; import org.apache.orc.OrcFile.WriterContext; import org.apache.orc.impl.AcidStats; import org.apache.orc.impl.OrcAcidUtils; import org.apache.orc.impl.WriterImpl; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import static org.junit.Assert.*; import org.junit.Assert; import org.junit.Before; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TestName; public class TestFixAcidKeyIndex { public final static Logger LOG = LoggerFactory.getLogger(TestFixAcidKeyIndex.class); @Rule public TestName testCaseName = new TestName(); Path workDir = new Path(System.getProperty("test.tmp.dir", "target/tmp")); Configuration conf; Path testFilePath; FileSystem fs; @Before public void openFileSystem() throws Exception { conf = new Configuration(); fs = FileSystem.getLocal(conf); testFilePath = new Path(workDir, "TestFixAcidKeyIndex." + testCaseName.getMethodName() + ".orc"); fs.delete(testFilePath, false); } static abstract class TestKeyIndexBuilder extends OrcRecordUpdater.KeyIndexBuilder implements OrcFile.WriterCallback { public TestKeyIndexBuilder(String name) { super(name); } // Will be called before closing the ORC file to stop writing any additional information // to the acid key index. abstract void stopWritingKeyIndex(); } void createTestAcidFile(Path path, int numRows, TestKeyIndexBuilder indexBuilder) throws Exception { FileSystem fs = path.getFileSystem(conf); fs.delete(path, true); String typeStr = "struct<operation:int," + "originalTransaction:bigint,bucket:int,rowId:bigint," + "currentTransaction:bigint," + "row:struct<a:int,b:struct<c:int>,d:string>>"; TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeStr); Writer writer = OrcFile.createWriter(path, OrcFile.writerOptions(conf).fileSystem(fs).inspector(OrcStruct.createObjectInspector(typeInfo)) .compress(CompressionKind.NONE).callback(indexBuilder).stripeSize(128)); // Create ORC file with small stripe size so we can write multiple stripes. OrcStruct row = new OrcStruct(6); row.setFieldValue(0, new IntWritable(0)); row.setFieldValue(1, new LongWritable(1)); row.setFieldValue(2, new IntWritable(0)); LongWritable rowId = new LongWritable(); row.setFieldValue(3, rowId); row.setFieldValue(4, new LongWritable(1)); OrcStruct rowField = new OrcStruct(3); row.setFieldValue(5, rowField); IntWritable a = new IntWritable(); rowField.setFieldValue(0, a); OrcStruct b = new OrcStruct(1); rowField.setFieldValue(1, b); IntWritable c = new IntWritable(); b.setFieldValue(0, c); Text d = new Text(); rowField.setFieldValue(2, d); // Minimum 5000 rows per stripe. for (int r = 0; r < numRows; r++) { // row id rowId.set(r); // a a.set(r * 42); // b.c c.set(r * 10001); // d d.set(Integer.toHexString(r)); indexBuilder.addKey(OrcRecordUpdater.INSERT_OPERATION, 1, 0, rowId.get()); writer.addRow(row); } indexBuilder.stopWritingKeyIndex(); writer.close(); } void runIndexCheck(Path orcFile, File outFile) throws Exception { // Run with --check-index and save the output to file so it can be checked. PrintStream origOut = System.out; FileOutputStream myOut = new FileOutputStream(outFile); System.setOut(new PrintStream(myOut)); String[] checkArgs = new String[] { "--check-only", orcFile.toString() }; FixAcidKeyIndex.main(checkArgs); System.out.flush(); System.setOut(origOut); } void checkValidKeyIndex(Path orcFile) throws Exception { String outputFilename = "fixAcidKeyIndex.out"; File outFile = new File(workDir.toString(), outputFilename); runIndexCheck(orcFile, outFile); // Check the output of FixAcidKeyIndex - it should indicate the index was valid. String outputAsString = FileUtils.readFileToString(outFile); System.out.println(outputAsString); assertTrue(outputAsString.contains("acid key index is valid")); } void checkInvalidKeyIndex(Path orcFile) throws Exception { String outputFilename = "fixAcidKeyIndex.out"; File outFile = new File(workDir.toString(), outputFilename); runIndexCheck(orcFile, outFile); // Check the output of FixAcidKeyIndex - it should indicate the index was invalid. String outputAsString = FileUtils.readFileToString(outFile); System.out.println(outputAsString); assertTrue(outputAsString.contains("acid key index is invalid")); } void runFixIndex(Path orcFile, File outFile) throws Exception { // Run with --recover and save the output to a file so it can be checked. PrintStream origOut = System.out; FileOutputStream myOut = new FileOutputStream(outFile); System.setOut(new PrintStream(myOut)); String[] checkArgs = new String[] { "--recover", orcFile.toString() }; FixAcidKeyIndex.main(checkArgs); System.out.flush(); System.setOut(origOut); } void fixInvalidIndex(Path orcFile) throws Exception { String outputFilename = "fixAcidKeyIndex.out"; File outFile = new File(workDir.toString(), outputFilename); runFixIndex(orcFile, outFile); // Check the output of FixAcidKeyIndex - it should indicate the index was fixed. String outputAsString = FileUtils.readFileToString(outFile); System.out.println(outputAsString); assertTrue(outputAsString.contains("Fixed acid key index")); } void fixValidIndex(Path orcFile) throws Exception { String outputFilename = "fixAcidKeyIndex.out"; File outFile = new File(workDir.toString(), outputFilename); runFixIndex(orcFile, outFile); // Check the output of FixAcidKeyIndex - it should indicate nothing required fixing. String outputAsString = FileUtils.readFileToString(outFile); System.out.println(outputAsString); assertTrue(outputAsString.contains("No need to recover")); } @Test public void testValidKeyIndex() throws Exception { // Try with 0 row file. createTestAcidFile(testFilePath, 0, new GoodKeyIndexBuilder()); checkValidKeyIndex(testFilePath); // Attempting to fix a valid - should not result in a new file. fixValidIndex(testFilePath); // Try single stripe createTestAcidFile(testFilePath, 100, new GoodKeyIndexBuilder()); checkValidKeyIndex(testFilePath); // Attempting to fix a valid - should not result in a new file. fixValidIndex(testFilePath); // Multiple stripes createTestAcidFile(testFilePath, 12000, new GoodKeyIndexBuilder()); checkValidKeyIndex(testFilePath); // Attempting to fix a valid - should not result in a new file. fixValidIndex(testFilePath); } @Test public void testInvalidKeyIndex() throws Exception { // Try single stripe createTestAcidFile(testFilePath, 100, new BadKeyIndexBuilder()); checkInvalidKeyIndex(testFilePath); // Try fixing, this should result in new fixed file. fixInvalidIndex(testFilePath); // Multiple stripes createTestAcidFile(testFilePath, 12000, new BadKeyIndexBuilder()); checkInvalidKeyIndex(testFilePath); // Try fixing, this should result in new fixed file. fixInvalidIndex(testFilePath); } @Test public void testNonAcidOrcFile() throws Exception { // Copy data/files/alltypesorc to workDir Path baseSrcDir = new Path(System.getProperty("basedir")).getParent(); Path dataFilesPath = new Path(new Path(baseSrcDir, "data"), "files"); File origOrcFile = new File(dataFilesPath.toString(), "alltypesorc"); File testOrcFile = new File(workDir.toString(), "alltypesorc"); FileUtils.copyFile(origOrcFile, testOrcFile); String outputFilename = "fixAcidKeyIndex.out"; File outFile = new File(workDir.toString(), outputFilename); runIndexCheck(new Path(testOrcFile.getPath()), outFile); String outputAsString = FileUtils.readFileToString(outFile); System.out.println(outputAsString); assertTrue(outputAsString.contains("is not an acid file")); } /** * Version of KeyIndexBuilder that generates a valid key index */ static class GoodKeyIndexBuilder extends TestKeyIndexBuilder { GoodKeyIndexBuilder() { super("GoodKeyIndexBuilder"); } @Override public void stopWritingKeyIndex() { // Do nothing - this should generate proper index. } } /** * Bad version of KeyIndexBuilder which builds an invalid acid key index * by not including the key index info once stopWritingKeyIndex() is called. */ static class BadKeyIndexBuilder extends TestKeyIndexBuilder { boolean writeAcidIndexInfo = true; BadKeyIndexBuilder() { super("BadKeyIndexBuilder"); } public void stopWritingKeyIndex() { LOG.info("*** Stop writing index!"); writeAcidIndexInfo = false; } @Override public void preStripeWrite(OrcFile.WriterContext context) throws IOException { LOG.info("*** writeAcidIndexInfo: " + writeAcidIndexInfo); if (!writeAcidIndexInfo) { return; } super.preStripeWrite(context); } } }