Java tutorial
/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.orc.impl; import java.io.IOException; import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.TreeMap; import com.google.common.collect.Lists; import org.apache.commons.lang.builder.HashCodeBuilder; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.io.DiskRange; import org.apache.hadoop.hive.common.io.DiskRangeList; import org.apache.hadoop.hive.common.io.DiskRangeList.CreateHelper; import org.apache.hadoop.hive.common.io.DiskRangeList.MutateHelper; import org.apache.orc.CompressionCodec; import org.apache.orc.DataReader; import org.apache.orc.OrcProto; import com.google.common.collect.ComparisonChain; import org.apache.orc.StripeInformation; /** * Stateless methods shared between RecordReaderImpl and EncodedReaderImpl. */ public class RecordReaderUtils { private static final HadoopShims SHIMS = HadoopShims.Factory.get(); private static class DefaultDataReader implements DataReader { private FSDataInputStream file = null; private final ByteBufferAllocatorPool pool; private HadoopShims.ZeroCopyReaderShim zcr = null; private final FileSystem fs; private final Path path; private final boolean useZeroCopy; private final CompressionCodec codec; private final int bufferSize; private final int typeCount; private DefaultDataReader(DefaultDataReader other) { this.pool = other.pool; this.bufferSize = other.bufferSize; this.typeCount = other.typeCount; this.fs = other.fs; this.path = other.path; this.useZeroCopy = other.useZeroCopy; this.codec = other.codec; } private DefaultDataReader(DataReaderProperties properties) { this.fs = properties.getFileSystem(); this.path = properties.getPath(); this.useZeroCopy = properties.getZeroCopy(); this.codec = WriterImpl.createCodec(properties.getCompression()); this.bufferSize = properties.getBufferSize(); this.typeCount = properties.getTypeCount(); if (useZeroCopy) { this.pool = new ByteBufferAllocatorPool(); } else { this.pool = null; } } @Override public void open() throws IOException { this.file = fs.open(path); if (useZeroCopy) { zcr = RecordReaderUtils.createZeroCopyShim(file, codec, pool); } else { zcr = null; } } @Override public OrcIndex readRowIndex(StripeInformation stripe, OrcProto.StripeFooter footer, boolean[] included, OrcProto.RowIndex[] indexes, boolean[] sargColumns, OrcProto.BloomFilterIndex[] bloomFilterIndices) throws IOException { if (file == null) { open(); } if (footer == null) { footer = readStripeFooter(stripe); } if (indexes == null) { indexes = new OrcProto.RowIndex[typeCount]; } if (bloomFilterIndices == null) { bloomFilterIndices = new OrcProto.BloomFilterIndex[typeCount]; } long offset = stripe.getOffset(); List<OrcProto.Stream> streams = footer.getStreamsList(); for (int i = 0; i < streams.size(); i++) { OrcProto.Stream stream = streams.get(i); OrcProto.Stream nextStream = null; if (i < streams.size() - 1) { nextStream = streams.get(i + 1); } int col = stream.getColumn(); int len = (int) stream.getLength(); // row index stream and bloom filter are interlaced, check if the sarg column contains bloom // filter and combine the io to read row index and bloom filters for that column together if (stream.hasKind() && (stream.getKind() == OrcProto.Stream.Kind.ROW_INDEX)) { boolean readBloomFilter = false; if (sargColumns != null && sargColumns[col] && nextStream.getKind() == OrcProto.Stream.Kind.BLOOM_FILTER) { len += nextStream.getLength(); i += 1; readBloomFilter = true; } if ((included == null || included[col]) && indexes[col] == null) { byte[] buffer = new byte[len]; file.readFully(offset, buffer, 0, buffer.length); ByteBuffer bb = ByteBuffer.wrap(buffer); indexes[col] = OrcProto.RowIndex.parseFrom( InStream.create("index", Lists.<DiskRange>newArrayList(new BufferChunk(bb, 0)), stream.getLength(), codec, bufferSize)); if (readBloomFilter) { bb.position((int) stream.getLength()); bloomFilterIndices[col] = OrcProto.BloomFilterIndex.parseFrom(InStream.create( "bloom_filter", Lists.<DiskRange>newArrayList(new BufferChunk(bb, 0)), nextStream.getLength(), codec, bufferSize)); } } } offset += len; } OrcIndex index = new OrcIndex(indexes, bloomFilterIndices); return index; } @Override public OrcProto.StripeFooter readStripeFooter(StripeInformation stripe) throws IOException { if (file == null) { open(); } long offset = stripe.getOffset() + stripe.getIndexLength() + stripe.getDataLength(); int tailLength = (int) stripe.getFooterLength(); // read the footer ByteBuffer tailBuf = ByteBuffer.allocate(tailLength); file.readFully(offset, tailBuf.array(), tailBuf.arrayOffset(), tailLength); return OrcProto.StripeFooter.parseFrom(InStream.createCodedInputStream("footer", Lists.<DiskRange>newArrayList(new BufferChunk(tailBuf, 0)), tailLength, codec, bufferSize)); } @Override public DiskRangeList readFileData(DiskRangeList range, long baseOffset, boolean doForceDirect) throws IOException { return RecordReaderUtils.readDiskRanges(file, zcr, baseOffset, range, doForceDirect); } @Override public void close() throws IOException { if (pool != null) { pool.clear(); } // close both zcr and file try (HadoopShims.ZeroCopyReaderShim myZcr = zcr) { if (file != null) { file.close(); } } } @Override public boolean isTrackingDiskRanges() { return zcr != null; } @Override public void releaseBuffer(ByteBuffer buffer) { zcr.releaseBuffer(buffer); } @Override public DataReader clone() { return new DefaultDataReader(this); } } public static DataReader createDefaultDataReader(DataReaderProperties properties) { return new DefaultDataReader(properties); } public static boolean[] findPresentStreamsByColumn(List<OrcProto.Stream> streamList, List<OrcProto.Type> types) { boolean[] hasNull = new boolean[types.size()]; for (OrcProto.Stream stream : streamList) { if (stream.hasKind() && (stream.getKind() == OrcProto.Stream.Kind.PRESENT)) { hasNull[stream.getColumn()] = true; } } return hasNull; } /** * Does region A overlap region B? The end points are inclusive on both sides. * @param leftA A's left point * @param rightA A's right point * @param leftB B's left point * @param rightB B's right point * @return Does region A overlap region B? */ static boolean overlap(long leftA, long rightA, long leftB, long rightB) { if (leftA <= leftB) { return rightA >= leftB; } return rightB >= leftA; } public static void addEntireStreamToRanges(long offset, long length, CreateHelper list, boolean doMergeBuffers) { list.addOrMerge(offset, offset + length, doMergeBuffers, false); } public static void addRgFilteredStreamToRanges(OrcProto.Stream stream, boolean[] includedRowGroups, boolean isCompressed, OrcProto.RowIndex index, OrcProto.ColumnEncoding encoding, OrcProto.Type type, int compressionSize, boolean hasNull, long offset, long length, CreateHelper list, boolean doMergeBuffers) { for (int group = 0; group < includedRowGroups.length; ++group) { if (!includedRowGroups[group]) continue; int posn = getIndexPosition(encoding.getKind(), type.getKind(), stream.getKind(), isCompressed, hasNull); long start = index.getEntry(group).getPositions(posn); final long nextGroupOffset; boolean isLast = group == (includedRowGroups.length - 1); nextGroupOffset = isLast ? length : index.getEntry(group + 1).getPositions(posn); start += offset; long end = offset + estimateRgEndOffset(isCompressed, isLast, nextGroupOffset, length, compressionSize); list.addOrMerge(start, end, doMergeBuffers, true); } } public static long estimateRgEndOffset(boolean isCompressed, boolean isLast, long nextGroupOffset, long streamLength, int bufferSize) { // figure out the worst case last location // if adjacent groups have the same compressed block offset then stretch the slop // by factor of 2 to safely accommodate the next compression block. // One for the current compression block and another for the next compression block. long slop = isCompressed ? 2 * (OutStream.HEADER_SIZE + bufferSize) : WORST_UNCOMPRESSED_SLOP; return isLast ? streamLength : Math.min(streamLength, nextGroupOffset + slop); } private static final int BYTE_STREAM_POSITIONS = 1; private static final int RUN_LENGTH_BYTE_POSITIONS = BYTE_STREAM_POSITIONS + 1; private static final int BITFIELD_POSITIONS = RUN_LENGTH_BYTE_POSITIONS + 1; private static final int RUN_LENGTH_INT_POSITIONS = BYTE_STREAM_POSITIONS + 1; /** * Get the offset in the index positions for the column that the given * stream starts. * @param columnEncoding the encoding of the column * @param columnType the type of the column * @param streamType the kind of the stream * @param isCompressed is the file compressed * @param hasNulls does the column have a PRESENT stream? * @return the number of positions that will be used for that stream */ public static int getIndexPosition(OrcProto.ColumnEncoding.Kind columnEncoding, OrcProto.Type.Kind columnType, OrcProto.Stream.Kind streamType, boolean isCompressed, boolean hasNulls) { if (streamType == OrcProto.Stream.Kind.PRESENT) { return 0; } int compressionValue = isCompressed ? 1 : 0; int base = hasNulls ? (BITFIELD_POSITIONS + compressionValue) : 0; switch (columnType) { case BOOLEAN: case BYTE: case SHORT: case INT: case LONG: case FLOAT: case DOUBLE: case DATE: case STRUCT: case MAP: case LIST: case UNION: return base; case CHAR: case VARCHAR: case STRING: if (columnEncoding == OrcProto.ColumnEncoding.Kind.DICTIONARY || columnEncoding == OrcProto.ColumnEncoding.Kind.DICTIONARY_V2) { return base; } else { if (streamType == OrcProto.Stream.Kind.DATA) { return base; } else { return base + BYTE_STREAM_POSITIONS + compressionValue; } } case BINARY: if (streamType == OrcProto.Stream.Kind.DATA) { return base; } return base + BYTE_STREAM_POSITIONS + compressionValue; case DECIMAL: if (streamType == OrcProto.Stream.Kind.DATA) { return base; } return base + BYTE_STREAM_POSITIONS + compressionValue; case TIMESTAMP: if (streamType == OrcProto.Stream.Kind.DATA) { return base; } return base + RUN_LENGTH_INT_POSITIONS + compressionValue; default: throw new IllegalArgumentException("Unknown type " + columnType); } } // for uncompressed streams, what is the most overlap with the following set // of rows (long vint literal group). static final int WORST_UNCOMPRESSED_SLOP = 2 + 8 * 512; /** * Is this stream part of a dictionary? * @return is this part of a dictionary? */ public static boolean isDictionary(OrcProto.Stream.Kind kind, OrcProto.ColumnEncoding encoding) { assert kind != OrcProto.Stream.Kind.DICTIONARY_COUNT; OrcProto.ColumnEncoding.Kind encodingKind = encoding.getKind(); return kind == OrcProto.Stream.Kind.DICTIONARY_DATA || (kind == OrcProto.Stream.Kind.LENGTH && (encodingKind == OrcProto.ColumnEncoding.Kind.DICTIONARY || encodingKind == OrcProto.ColumnEncoding.Kind.DICTIONARY_V2)); } /** * Build a string representation of a list of disk ranges. * @param range ranges to stringify * @return the resulting string */ public static String stringifyDiskRanges(DiskRangeList range) { StringBuilder buffer = new StringBuilder(); buffer.append("["); boolean isFirst = true; while (range != null) { if (!isFirst) { buffer.append(", {"); } else { buffer.append("{"); } isFirst = false; buffer.append(range.toString()); buffer.append("}"); range = range.next; } buffer.append("]"); return buffer.toString(); } /** * Read the list of ranges from the file. * @param file the file to read * @param base the base of the stripe * @param range the disk ranges within the stripe to read * @return the bytes read for each disk range, which is the same length as * ranges * @throws IOException */ static DiskRangeList readDiskRanges(FSDataInputStream file, HadoopShims.ZeroCopyReaderShim zcr, long base, DiskRangeList range, boolean doForceDirect) throws IOException { if (range == null) return null; DiskRangeList prev = range.prev; if (prev == null) { prev = new MutateHelper(range); } while (range != null) { if (range.hasData()) { range = range.next; continue; } int len = (int) (range.getEnd() - range.getOffset()); long off = range.getOffset(); if (zcr != null) { file.seek(base + off); boolean hasReplaced = false; while (len > 0) { ByteBuffer partial = zcr.readBuffer(len, false); BufferChunk bc = new BufferChunk(partial, off); if (!hasReplaced) { range.replaceSelfWith(bc); hasReplaced = true; } else { range.insertAfter(bc); } range = bc; int read = partial.remaining(); len -= read; off += read; } } else { // Don't use HDFS ByteBuffer API because it has no readFully, and is buggy and pointless. byte[] buffer = new byte[len]; file.readFully((base + off), buffer, 0, buffer.length); ByteBuffer bb = null; if (doForceDirect) { bb = ByteBuffer.allocateDirect(len); bb.put(buffer); bb.position(0); bb.limit(len); } else { bb = ByteBuffer.wrap(buffer); } range = range.replaceSelfWith(new BufferChunk(bb, range.getOffset())); } range = range.next; } return prev.next; } static List<DiskRange> getStreamBuffers(DiskRangeList range, long offset, long length) { // This assumes sorted ranges (as do many other parts of ORC code. ArrayList<DiskRange> buffers = new ArrayList<DiskRange>(); if (length == 0) return buffers; long streamEnd = offset + length; boolean inRange = false; while (range != null) { if (!inRange) { if (range.getEnd() <= offset) { range = range.next; continue; // Skip until we are in range. } inRange = true; if (range.getOffset() < offset) { // Partial first buffer, add a slice of it. buffers.add(range.sliceAndShift(offset, Math.min(streamEnd, range.getEnd()), -offset)); if (range.getEnd() >= streamEnd) break; // Partial first buffer is also partial last buffer. range = range.next; continue; } } else if (range.getOffset() >= streamEnd) { break; } if (range.getEnd() > streamEnd) { // Partial last buffer (may also be the first buffer), add a slice of it. buffers.add(range.sliceAndShift(range.getOffset(), streamEnd, -offset)); break; } // Buffer that belongs entirely to one stream. // TODO: ideally we would want to reuse the object and remove it from the list, but we cannot // because bufferChunks is also used by clearStreams for zcr. Create a useless dup. buffers.add(range.sliceAndShift(range.getOffset(), range.getEnd(), -offset)); if (range.getEnd() == streamEnd) break; range = range.next; } return buffers; } static HadoopShims.ZeroCopyReaderShim createZeroCopyShim(FSDataInputStream file, CompressionCodec codec, ByteBufferAllocatorPool pool) throws IOException { if ((codec == null || ((codec instanceof DirectDecompressionCodec) && ((DirectDecompressionCodec) codec).isAvailable()))) { /* codec is null or is available */ return SHIMS.getZeroCopyReader(file, pool); } return null; } // this is an implementation copied from ElasticByteBufferPool in hadoop-2, // which lacks a clear()/clean() operation public final static class ByteBufferAllocatorPool implements HadoopShims.ByteBufferPoolShim { private static final class Key implements Comparable<Key> { private final int capacity; private final long insertionGeneration; Key(int capacity, long insertionGeneration) { this.capacity = capacity; this.insertionGeneration = insertionGeneration; } @Override public int compareTo(Key other) { return ComparisonChain.start().compare(capacity, other.capacity) .compare(insertionGeneration, other.insertionGeneration).result(); } @Override public boolean equals(Object rhs) { if (rhs == null) { return false; } try { Key o = (Key) rhs; return (compareTo(o) == 0); } catch (ClassCastException e) { return false; } } @Override public int hashCode() { return new HashCodeBuilder().append(capacity).append(insertionGeneration).toHashCode(); } } private final TreeMap<Key, ByteBuffer> buffers = new TreeMap<Key, ByteBuffer>(); private final TreeMap<Key, ByteBuffer> directBuffers = new TreeMap<Key, ByteBuffer>(); private long currentGeneration = 0; private final TreeMap<Key, ByteBuffer> getBufferTree(boolean direct) { return direct ? directBuffers : buffers; } public void clear() { buffers.clear(); directBuffers.clear(); } @Override public ByteBuffer getBuffer(boolean direct, int length) { TreeMap<Key, ByteBuffer> tree = getBufferTree(direct); Map.Entry<Key, ByteBuffer> entry = tree.ceilingEntry(new Key(length, 0)); if (entry == null) { return direct ? ByteBuffer.allocateDirect(length) : ByteBuffer.allocate(length); } tree.remove(entry.getKey()); return entry.getValue(); } @Override public void putBuffer(ByteBuffer buffer) { TreeMap<Key, ByteBuffer> tree = getBufferTree(buffer.isDirect()); while (true) { Key key = new Key(buffer.capacity(), currentGeneration++); if (!tree.containsKey(key)) { tree.put(key, buffer); return; } // Buffers are indexed by (capacity, generation). // If our key is not unique on the first try, we try again } } } }